1 /* 2 * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.misc; 27 28 import sun.misc.FloatConsts; 29 import sun.misc.DoubleConsts; 30 31 /** 32 * The class <code>FpUtils</code> contains static utility methods for 33 * manipulating and inspecting <code>float</code> and 34 * <code>double</code> floating-point numbers. These methods include 35 * functionality recommended or required by the IEEE 754 36 * floating-point standard. 37 * 38 * @author Joseph D. Darcy 39 */ 40 41 public class FpUtils { 42 /* 43 * The methods in this class are reasonably implemented using 44 * direct or indirect bit-level manipulation of floating-point 45 * values. However, having access to the IEEE 754 recommended 46 * functions would obviate the need for most programmers to engage 47 * in floating-point bit-twiddling. 48 * 49 * An IEEE 754 number has three fields, from most significant bit 50 * to to least significant, sign, exponent, and significand. 51 * 52 * msb lsb 53 * [sign|exponent| fractional_significand] 54 * 119 * ``100% Pure Java''(TM)," University of California, Berkeley 120 * technical report UCB//CSD-98-1009. 121 */ 122 123 /** 124 * Don't let anyone instantiate this class. 125 */ 126 private FpUtils() {} 127 128 // Constants used in scalb 129 static double twoToTheDoubleScaleUp = powerOfTwoD(512); 130 static double twoToTheDoubleScaleDown = powerOfTwoD(-512); 131 132 // Helper Methods 133 134 // The following helper methods are used in the implementation of 135 // the public recommended functions; they generally omit certain 136 // tests for exception cases. 137 138 /** 139 * Returns unbiased exponent of a <code>double</code>. 140 */ 141 public static int getExponent(double d){ 142 /* 143 * Bitwise convert d to long, mask out exponent bits, shift 144 * to the right and then subtract out double's bias adjust to 145 * get true exponent value. 146 */ 147 return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >> 148 (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS); 149 } 150 151 /** 152 * Returns unbiased exponent of a <code>float</code>. 153 */ 154 public static int getExponent(float f){ 155 /* 156 * Bitwise convert f to integer, mask out exponent bits, shift 157 * to the right and then subtract out float's bias adjust to 158 * get true exponent value 159 */ 160 return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >> 161 (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS; 162 } 163 164 /** 165 * Returns a floating-point power of two in the normal range. 166 */ 167 static double powerOfTwoD(int n) { 168 assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT); 169 return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) << 170 (DoubleConsts.SIGNIFICAND_WIDTH-1)) 171 & DoubleConsts.EXP_BIT_MASK); 172 } 173 174 /** 175 * Returns a floating-point power of two in the normal range. 176 */ 177 static float powerOfTwoF(int n) { 178 assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT); 179 return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) << 180 (FloatConsts.SIGNIFICAND_WIDTH-1)) 181 & FloatConsts.EXP_BIT_MASK); 182 } 183 184 /** 185 * Returns the first floating-point argument with the sign of the 186 * second floating-point argument. Note that unlike the {@link 187 * FpUtils#copySign(double, double) copySign} method, this method 188 * does not require NaN <code>sign</code> arguments to be treated 189 * as positive values; implementations are permitted to treat some 190 * NaN arguments as positive and other NaN arguments as negative 191 * to allow greater performance. 192 * 193 * @param magnitude the parameter providing the magnitude of the result 194 * @param sign the parameter providing the sign of the result 195 * @return a value with the magnitude of <code>magnitude</code> 196 * and the sign of <code>sign</code>. 197 * @author Joseph D. Darcy 198 */ 199 public static double rawCopySign(double magnitude, double sign) { 200 return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) & 201 (DoubleConsts.SIGN_BIT_MASK)) | 202 (Double.doubleToRawLongBits(magnitude) & 203 (DoubleConsts.EXP_BIT_MASK | 204 DoubleConsts.SIGNIF_BIT_MASK))); 205 } 206 207 /** 208 * Returns the first floating-point argument with the sign of the 209 * second floating-point argument. Note that unlike the {@link 210 * FpUtils#copySign(float, float) copySign} method, this method 211 * does not require NaN <code>sign</code> arguments to be treated 212 * as positive values; implementations are permitted to treat some 213 * NaN arguments as positive and other NaN arguments as negative 214 * to allow greater performance. 215 * 216 * @param magnitude the parameter providing the magnitude of the result 217 * @param sign the parameter providing the sign of the result 218 * @return a value with the magnitude of <code>magnitude</code> 219 * and the sign of <code>sign</code>. 220 * @author Joseph D. Darcy 221 */ 222 public static float rawCopySign(float magnitude, float sign) { 223 return Float.intBitsToFloat((Float.floatToRawIntBits(sign) & 224 (FloatConsts.SIGN_BIT_MASK)) | 225 (Float.floatToRawIntBits(magnitude) & 226 (FloatConsts.EXP_BIT_MASK | 227 FloatConsts.SIGNIF_BIT_MASK))); 228 } 229 230 /* ***************************************************************** */ 231 232 /** 233 * Returns <code>true</code> if the argument is a finite 234 * floating-point value; returns <code>false</code> otherwise (for 235 * NaN and infinity arguments). 236 * 237 * @param d the <code>double</code> value to be tested 238 * @return <code>true</code> if the argument is a finite 239 * floating-point value, <code>false</code> otherwise. 240 */ 241 public static boolean isFinite(double d) { 242 return Math.abs(d) <= DoubleConsts.MAX_VALUE; 243 } 244 245 /** 246 * Returns <code>true</code> if the argument is a finite 247 * floating-point value; returns <code>false</code> otherwise (for 248 * NaN and infinity arguments). 249 * 250 * @param f the <code>float</code> value to be tested 251 * @return <code>true</code> if the argument is a finite 252 * floating-point value, <code>false</code> otherwise. 253 */ 254 public static boolean isFinite(float f) { 255 return Math.abs(f) <= FloatConsts.MAX_VALUE; 256 } 257 258 /** 259 * Returns <code>true</code> if the specified number is infinitely 260 * large in magnitude, <code>false</code> otherwise. 261 * 262 * <p>Note that this method is equivalent to the {@link 263 * Double#isInfinite(double) Double.isInfinite} method; the 264 * functionality is included in this class for convenience. 265 * 266 * @param d the value to be tested. 267 * @return <code>true</code> if the value of the argument is positive 268 * infinity or negative infinity; <code>false</code> otherwise. 269 */ 270 public static boolean isInfinite(double d) { 271 return Double.isInfinite(d); 272 } 273 274 /** 275 * Returns <code>true</code> if the specified number is infinitely 276 * large in magnitude, <code>false</code> otherwise. 277 * 278 * <p>Note that this method is equivalent to the {@link 279 * Float#isInfinite(float) Float.isInfinite} method; the 280 * functionality is included in this class for convenience. 281 * 282 * @param f the value to be tested. 283 * @return <code>true</code> if the argument is positive infinity or 284 * negative infinity; <code>false</code> otherwise. 285 */ 286 public static boolean isInfinite(float f) { 287 return Float.isInfinite(f); 288 } 289 290 /** 291 * Returns <code>true</code> if the specified number is a 292 * Not-a-Number (NaN) value, <code>false</code> otherwise. 293 * 294 * <p>Note that this method is equivalent to the {@link 295 * Double#isNaN(double) Double.isNaN} method; the functionality is 296 * included in this class for convenience. 297 * 298 * @param d the value to be tested. 299 * @return <code>true</code> if the value of the argument is NaN; 300 * <code>false</code> otherwise. 301 */ 302 public static boolean isNaN(double d) { 303 return Double.isNaN(d); 304 } 305 306 /** 307 * Returns <code>true</code> if the specified number is a 308 * Not-a-Number (NaN) value, <code>false</code> otherwise. 309 * 310 * <p>Note that this method is equivalent to the {@link 311 * Float#isNaN(float) Float.isNaN} method; the functionality is 312 * included in this class for convenience. 313 * 314 * @param f the value to be tested. 315 * @return <code>true</code> if the argument is NaN; 316 * <code>false</code> otherwise. 317 */ 318 public static boolean isNaN(float f) { 319 return Float.isNaN(f); 320 } 321 322 /** 323 * Returns <code>true</code> if the unordered relation holds 324 * between the two arguments. When two floating-point values are 325 * unordered, one value is neither less than, equal to, nor 326 * greater than the other. For the unordered relation to be true, 327 * at least one argument must be a <code>NaN</code>. 328 * 329 * @param arg1 the first argument 330 * @param arg2 the second argument 331 * @return <code>true</code> if at least one argument is a NaN, 332 * <code>false</code> otherwise. 333 */ 334 public static boolean isUnordered(double arg1, double arg2) { 335 return isNaN(arg1) || isNaN(arg2); 336 } 337 338 /** 339 * Returns <code>true</code> if the unordered relation holds 340 * between the two arguments. When two floating-point values are 341 * unordered, one value is neither less than, equal to, nor 342 * greater than the other. For the unordered relation to be true, 343 * at least one argument must be a <code>NaN</code>. 344 * 345 * @param arg1 the first argument 346 * @param arg2 the second argument 347 * @return <code>true</code> if at least one argument is a NaN, 348 * <code>false</code> otherwise. 349 */ 350 public static boolean isUnordered(float arg1, float arg2) { 351 return isNaN(arg1) || isNaN(arg2); 352 } 353 354 /** 355 * Returns unbiased exponent of a <code>double</code>; for 356 * subnormal values, the number is treated as if it were 357 * normalized. That is for all finite, non-zero, positive numbers 358 * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is 359 * always in the range [1, 2). 360 * <p> 361 * Special cases: 362 * <ul> 363 * <li> If the argument is NaN, then the result is 2<sup>30</sup>. 364 * <li> If the argument is infinite, then the result is 2<sup>28</sup>. 365 * <li> If the argument is zero, then the result is -(2<sup>28</sup>). 366 * </ul> 367 * 368 * @param d floating-point number whose exponent is to be extracted 369 * @return unbiased exponent of the argument. 370 * @author Joseph D. Darcy 371 */ 372 public static int ilogb(double d) { 373 int exponent = getExponent(d); 374 375 switch (exponent) { 376 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity 377 if( isNaN(d) ) 378 return (1<<30); // 2^30 379 else // infinite value 380 return (1<<28); // 2^28 381 // break; 382 383 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal 384 if(d == 0.0) { 385 return -(1<<28); // -(2^28) 386 } 387 else { 388 long transducer = Double.doubleToRawLongBits(d); 389 390 /* 391 * To avoid causing slow arithmetic on subnormals, 392 * the scaling to determine when d's significand 393 * is normalized is done in integer arithmetic. 394 * (there must be at least one "1" bit in the 395 * significand since zero has been screened out. 396 */ 397 398 // isolate significand bits 399 transducer &= DoubleConsts.SIGNIF_BIT_MASK; 400 assert(transducer != 0L); 401 402 // This loop is simple and functional. We might be 403 // able to do something more clever that was faster; 404 // e.g. number of leading zero detection on 405 // (transducer << (# exponent and sign bits). 406 while (transducer < 407 (1L << (DoubleConsts.SIGNIFICAND_WIDTH - 1))) { 408 transducer *= 2; 409 exponent--; 410 } 411 exponent++; 412 assert( exponent >= 413 DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1) && 414 exponent < DoubleConsts.MIN_EXPONENT); 415 return exponent; 416 } 417 // break; 418 419 default: 420 assert( exponent >= DoubleConsts.MIN_EXPONENT && 421 exponent <= DoubleConsts.MAX_EXPONENT); 422 return exponent; 423 // break; 424 } 425 } 426 427 /** 428 * Returns unbiased exponent of a <code>float</code>; for 429 * subnormal values, the number is treated as if it were 430 * normalized. That is for all finite, non-zero, positive numbers 431 * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is 432 * always in the range [1, 2). 433 * <p> 434 * Special cases: 435 * <ul> 436 * <li> If the argument is NaN, then the result is 2<sup>30</sup>. 437 * <li> If the argument is infinite, then the result is 2<sup>28</sup>. 438 * <li> If the argument is zero, then the result is -(2<sup>28</sup>). 439 * </ul> 440 * 441 * @param f floating-point number whose exponent is to be extracted 442 * @return unbiased exponent of the argument. 443 * @author Joseph D. Darcy 444 */ 445 public static int ilogb(float f) { 446 int exponent = getExponent(f); 447 448 switch (exponent) { 449 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity 450 if( isNaN(f) ) 451 return (1<<30); // 2^30 452 else // infinite value 453 return (1<<28); // 2^28 454 // break; 455 456 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal 457 if(f == 0.0f) { 458 return -(1<<28); // -(2^28) 459 } 460 else { 461 int transducer = Float.floatToRawIntBits(f); 462 463 /* 464 * To avoid causing slow arithmetic on subnormals, 465 * the scaling to determine when f's significand 466 * is normalized is done in integer arithmetic. 467 * (there must be at least one "1" bit in the 468 * significand since zero has been screened out. 469 */ 470 471 // isolate significand bits 472 transducer &= FloatConsts.SIGNIF_BIT_MASK; 473 assert(transducer != 0); 474 475 // This loop is simple and functional. We might be 476 // able to do something more clever that was faster; 477 // e.g. number of leading zero detection on 478 // (transducer << (# exponent and sign bits). 479 while (transducer < 480 (1 << (FloatConsts.SIGNIFICAND_WIDTH - 1))) { 481 transducer *= 2; 482 exponent--; 483 } 484 exponent++; 485 assert( exponent >= 486 FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1) && 487 exponent < FloatConsts.MIN_EXPONENT); 488 return exponent; 489 } 490 // break; 491 492 default: 493 assert( exponent >= FloatConsts.MIN_EXPONENT && 494 exponent <= FloatConsts.MAX_EXPONENT); 495 return exponent; 496 // break; 497 } 498 } 499 500 501 /* 502 * The scalb operation should be reasonably fast; however, there 503 * are tradeoffs in writing a method to minimize the worst case 504 * performance and writing a method to minimize the time for 505 * expected common inputs. Some processors operate very slowly on 506 * subnormal operands, taking hundreds or thousands of cycles for 507 * one floating-point add or multiply as opposed to, say, four 508 * cycles for normal operands. For processors with very slow 509 * subnormal execution, scalb would be fastest if written entirely 510 * with integer operations; in other words, scalb would need to 511 * include the logic of performing correct rounding of subnormal 512 * values. This could be reasonably done in at most a few hundred 513 * cycles. However, this approach may penalize normal operations 514 * since at least the exponent of the floating-point argument must 515 * be examined. 516 * 517 * The approach taken in this implementation is a compromise. 518 * Floating-point multiplication is used to do most of the work; 519 * but knowingly multiplying by a subnormal scaling factor is 520 * avoided. However, the floating-point argument is not examined 521 * to see whether or not it is subnormal since subnormal inputs 522 * are assumed to be rare. At most three multiplies are needed to 523 * scale from the largest to smallest exponent ranges (scaling 524 * down, at most two multiplies are needed if subnormal scaling 525 * factors are allowed). However, in this implementation an 526 * expensive integer remainder operation is avoided at the cost of 527 * requiring five floating-point multiplies in the worst case, 528 * which should still be a performance win. 529 * 530 * If scaling of entire arrays is a concern, it would probably be 531 * more efficient to provide a double[] scalb(double[], int) 532 * version of scalb to avoid having to recompute the needed 533 * scaling factors for each floating-point value. 534 */ 535 536 /** 537 * Return <code>d</code> × 538 * 2<sup><code>scale_factor</code></sup> rounded as if performed 539 * by a single correctly rounded floating-point multiply to a 540 * member of the double value set. See <a 541 * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">§4.2.3</a> 542 * of the <a href="http://java.sun.com/docs/books/jls/html/">Java 543 * Language Specification</a> for a discussion of floating-point 544 * value sets. If the exponent of the result is between the 545 * <code>double</code>'s minimum exponent and maximum exponent, 546 * the answer is calculated exactly. If the exponent of the 547 * result would be larger than <code>doubles</code>'s maximum 548 * exponent, an infinity is returned. Note that if the result is 549 * subnormal, precision may be lost; that is, when <code>scalb(x, 550 * n)</code> is subnormal, <code>scalb(scalb(x, n), -n)</code> may 551 * not equal <i>x</i>. When the result is non-NaN, the result has 552 * the same sign as <code>d</code>. 553 * 554 *<p> 555 * Special cases: 556 * <ul> 557 * <li> If the first argument is NaN, NaN is returned. 558 * <li> If the first argument is infinite, then an infinity of the 559 * same sign is returned. 560 * <li> If the first argument is zero, then a zero of the same 561 * sign is returned. 562 * </ul> 563 * 564 * @param d number to be scaled by a power of two. 565 * @param scale_factor power of 2 used to scale <code>d</code> 566 * @return <code>d * </code>2<sup><code>scale_factor</code></sup> 567 * @author Joseph D. Darcy 568 */ 569 public static double scalb(double d, int scale_factor) { 570 /* 571 * This method does not need to be declared strictfp to 572 * compute the same correct result on all platforms. When 573 * scaling up, it does not matter what order the 574 * multiply-store operations are done; the result will be 575 * finite or overflow regardless of the operation ordering. 576 * However, to get the correct result when scaling down, a 577 * particular ordering must be used. 578 * 579 * When scaling down, the multiply-store operations are 580 * sequenced so that it is not possible for two consecutive 581 * multiply-stores to return subnormal results. If one 582 * multiply-store result is subnormal, the next multiply will 583 * round it away to zero. This is done by first multiplying 584 * by 2 ^ (scale_factor % n) and then multiplying several 585 * times by by 2^n as needed where n is the exponent of number 586 * that is a covenient power of two. In this way, at most one 627 scale_increment = 512; 628 exp_delta = twoToTheDoubleScaleUp; 629 } 630 631 // Calculate (scale_factor % +/-512), 512 = 2^9, using 632 // technique from "Hacker's Delight" section 10-2. 633 int t = (scale_factor >> 9-1) >>> 32 - 9; 634 exp_adjust = ((scale_factor + t) & (512 -1)) - t; 635 636 d *= powerOfTwoD(exp_adjust); 637 scale_factor -= exp_adjust; 638 639 while(scale_factor != 0) { 640 d *= exp_delta; 641 scale_factor -= scale_increment; 642 } 643 return d; 644 } 645 646 /** 647 * Return <code>f </code>× 648 * 2<sup><code>scale_factor</code></sup> rounded as if performed 649 * by a single correctly rounded floating-point multiply to a 650 * member of the float value set. See <a 651 * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">§4.2.3</a> 652 * of the <a href="http://java.sun.com/docs/books/jls/html/">Java 653 * Language Specification</a> for a discussion of floating-point 654 * value set. If the exponent of the result is between the 655 * <code>float</code>'s minimum exponent and maximum exponent, the 656 * answer is calculated exactly. If the exponent of the result 657 * would be larger than <code>float</code>'s maximum exponent, an 658 * infinity is returned. Note that if the result is subnormal, 659 * precision may be lost; that is, when <code>scalb(x, n)</code> 660 * is subnormal, <code>scalb(scalb(x, n), -n)</code> may not equal 661 * <i>x</i>. When the result is non-NaN, the result has the same 662 * sign as <code>f</code>. 663 * 664 *<p> 665 * Special cases: 666 * <ul> 667 * <li> If the first argument is NaN, NaN is returned. 668 * <li> If the first argument is infinite, then an infinity of the 669 * same sign is returned. 670 * <li> If the first argument is zero, then a zero of the same 671 * sign is returned. 672 * </ul> 673 * 674 * @param f number to be scaled by a power of two. 675 * @param scale_factor power of 2 used to scale <code>f</code> 676 * @return <code>f * </code>2<sup><code>scale_factor</code></sup> 677 * @author Joseph D. Darcy 678 */ 679 public static float scalb(float f, int scale_factor) { 680 // magnitude of a power of two so large that scaling a finite 681 // nonzero value by it would be guaranteed to over or 682 // underflow; due to rounding, scaling down takes takes an 683 // additional power of two which is reflected here 684 final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT + 685 FloatConsts.SIGNIFICAND_WIDTH + 1; 686 687 // Make sure scaling factor is in a reasonable range 688 scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE); 689 690 /* 691 * Since + MAX_SCALE for float fits well within the double 692 * exponent range and + float -> double conversion is exact 693 * the multiplication below will be exact. Therefore, the 694 * rounding that occurs when the double product is cast to 695 * float will be the correctly rounded float result. Since 696 * all operations other than the final multiply will be exact, 697 * it is not necessary to declare this method strictfp. 698 */ 699 return (float)((double)f*powerOfTwoD(scale_factor)); 700 } 701 702 /** 703 * Returns the floating-point number adjacent to the first 704 * argument in the direction of the second argument. If both 705 * arguments compare as equal the second argument is returned. 706 * 707 * <p> 708 * Special cases: 709 * <ul> 710 * <li> If either argument is a NaN, then NaN is returned. 711 * 712 * <li> If both arguments are signed zeros, <code>direction</code> 713 * is returned unchanged (as implied by the requirement of 714 * returning the second argument if the arguments compare as 715 * equal). 716 * 717 * <li> If <code>start</code> is 718 * ±<code>Double.MIN_VALUE</code> and <code>direction</code> 719 * has a value such that the result should have a smaller 720 * magnitude, then a zero with the same sign as <code>start</code> 721 * is returned. 722 * 723 * <li> If <code>start</code> is infinite and 724 * <code>direction</code> has a value such that the result should 725 * have a smaller magnitude, <code>Double.MAX_VALUE</code> with the 726 * same sign as <code>start</code> is returned. 727 * 728 * <li> If <code>start</code> is equal to ± 729 * <code>Double.MAX_VALUE</code> and <code>direction</code> has a 730 * value such that the result should have a larger magnitude, an 731 * infinity with same sign as <code>start</code> is returned. 732 * </ul> 733 * 734 * @param start starting floating-point value 735 * @param direction value indicating which of 736 * <code>start</code>'s neighbors or <code>start</code> should 737 * be returned 738 * @return The floating-point number adjacent to <code>start</code> in the 739 * direction of <code>direction</code>. 740 * @author Joseph D. Darcy 741 */ 742 public static double nextAfter(double start, double direction) { 743 /* 744 * The cases: 745 * 746 * nextAfter(+infinity, 0) == MAX_VALUE 747 * nextAfter(+infinity, +infinity) == +infinity 748 * nextAfter(-infinity, 0) == -MAX_VALUE 749 * nextAfter(-infinity, -infinity) == -infinity 750 * 751 * are naturally handled without any additional testing 752 */ 753 754 // First check for NaN values 755 if (isNaN(start) || isNaN(direction)) { 756 // return a NaN derived from the input NaN(s) 757 return start + direction; 758 } else if (start == direction) { 759 return direction; 792 * explicitly. 793 */ 794 else 795 transducer = DoubleConsts.SIGN_BIT_MASK | 1L; 796 } 797 798 return Double.longBitsToDouble(transducer); 799 } 800 } 801 802 /** 803 * Returns the floating-point number adjacent to the first 804 * argument in the direction of the second argument. If both 805 * arguments compare as equal, the second argument is returned. 806 * 807 * <p> 808 * Special cases: 809 * <ul> 810 * <li> If either argument is a NaN, then NaN is returned. 811 * 812 * <li> If both arguments are signed zeros, a <code>float</code> 813 * zero with the same sign as <code>direction</code> is returned 814 * (as implied by the requirement of returning the second argument 815 * if the arguments compare as equal). 816 * 817 * <li> If <code>start</code> is 818 * ±<code>Float.MIN_VALUE</code> and <code>direction</code> 819 * has a value such that the result should have a smaller 820 * magnitude, then a zero with the same sign as <code>start</code> 821 * is returned. 822 * 823 * <li> If <code>start</code> is infinite and 824 * <code>direction</code> has a value such that the result should 825 * have a smaller magnitude, <code>Float.MAX_VALUE</code> with the 826 * same sign as <code>start</code> is returned. 827 * 828 * <li> If <code>start</code> is equal to ± 829 * <code>Float.MAX_VALUE</code> and <code>direction</code> has a 830 * value such that the result should have a larger magnitude, an 831 * infinity with same sign as <code>start</code> is returned. 832 * </ul> 833 * 834 * @param start starting floating-point value 835 * @param direction value indicating which of 836 * <code>start</code>'s neighbors or <code>start</code> should 837 * be returned 838 * @return The floating-point number adjacent to <code>start</code> in the 839 * direction of <code>direction</code>. 840 * @author Joseph D. Darcy 841 */ 842 public static float nextAfter(float start, double direction) { 843 /* 844 * The cases: 845 * 846 * nextAfter(+infinity, 0) == MAX_VALUE 847 * nextAfter(+infinity, +infinity) == +infinity 848 * nextAfter(-infinity, 0) == -MAX_VALUE 849 * nextAfter(-infinity, -infinity) == -infinity 850 * 851 * are naturally handled without any additional testing 852 */ 853 854 // First check for NaN values 855 if (isNaN(start) || isNaN(direction)) { 856 // return a NaN derived from the input NaN(s) 857 return start + (float)direction; 858 } else if (start == direction) { 859 return (float)direction; 883 else 884 if (transducer < 0 ) 885 ++transducer; 886 /* 887 * transducer==0, the result is -MIN_VALUE 888 * 889 * The transition from zero (implicitly 890 * positive) to the smallest negative 891 * signed magnitude value must be done 892 * explicitly. 893 */ 894 else 895 transducer = FloatConsts.SIGN_BIT_MASK | 1; 896 } 897 898 return Float.intBitsToFloat(transducer); 899 } 900 } 901 902 /** 903 * Returns the floating-point value adjacent to <code>d</code> in 904 * the direction of positive infinity. This method is 905 * semantically equivalent to <code>nextAfter(d, 906 * Double.POSITIVE_INFINITY)</code>; however, a <code>nextUp</code> 907 * implementation may run faster than its equivalent 908 * <code>nextAfter</code> call. 909 * 910 * <p>Special Cases: 911 * <ul> 912 * <li> If the argument is NaN, the result is NaN. 913 * 914 * <li> If the argument is positive infinity, the result is 915 * positive infinity. 916 * 917 * <li> If the argument is zero, the result is 918 * <code>Double.MIN_VALUE</code> 919 * 920 * </ul> 921 * 922 * @param d starting floating-point value 923 * @return The adjacent floating-point value closer to positive 924 * infinity. 925 * @author Joseph D. Darcy 926 */ 927 public static double nextUp(double d) { 928 if( isNaN(d) || d == Double.POSITIVE_INFINITY) 929 return d; 930 else { 931 d += 0.0d; 932 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) + 933 ((d >= 0.0d)?+1L:-1L)); 934 } 935 } 936 937 /** 938 * Returns the floating-point value adjacent to <code>f</code> in 939 * the direction of positive infinity. This method is 940 * semantically equivalent to <code>nextAfter(f, 941 * Double.POSITIVE_INFINITY)</code>; however, a <code>nextUp</code> 942 * implementation may run faster than its equivalent 943 * <code>nextAfter</code> call. 944 * 945 * <p>Special Cases: 946 * <ul> 947 * <li> If the argument is NaN, the result is NaN. 948 * 949 * <li> If the argument is positive infinity, the result is 950 * positive infinity. 951 * 952 * <li> If the argument is zero, the result is 953 * <code>Float.MIN_VALUE</code> 954 * 955 * </ul> 956 * 957 * @param f starting floating-point value 958 * @return The adjacent floating-point value closer to positive 959 * infinity. 960 * @author Joseph D. Darcy 961 */ 962 public static float nextUp(float f) { 963 if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY) 964 return f; 965 else { 966 f += 0.0f; 967 return Float.intBitsToFloat(Float.floatToRawIntBits(f) + 968 ((f >= 0.0f)?+1:-1)); 969 } 970 } 971 972 /** 973 * Returns the floating-point value adjacent to <code>d</code> in 974 * the direction of negative infinity. This method is 975 * semantically equivalent to <code>nextAfter(d, 976 * Double.NEGATIVE_INFINITY)</code>; however, a 977 * <code>nextDown</code> implementation may run faster than its 978 * equivalent <code>nextAfter</code> call. 979 * 980 * <p>Special Cases: 981 * <ul> 982 * <li> If the argument is NaN, the result is NaN. 983 * 984 * <li> If the argument is negative infinity, the result is 985 * negative infinity. 986 * 987 * <li> If the argument is zero, the result is 988 * <code>-Double.MIN_VALUE</code> 989 * 990 * </ul> 991 * 992 * @param d starting floating-point value 993 * @return The adjacent floating-point value closer to negative 994 * infinity. 995 * @author Joseph D. Darcy 996 */ 997 public static double nextDown(double d) { 998 if( isNaN(d) || d == Double.NEGATIVE_INFINITY) 999 return d; 1000 else { 1001 if (d == 0.0) 1002 return -Double.MIN_VALUE; 1003 else 1004 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) + 1005 ((d > 0.0d)?-1L:+1L)); 1006 } 1007 } 1008 1009 /** 1010 * Returns the floating-point value adjacent to <code>f</code> in 1011 * the direction of negative infinity. This method is 1012 * semantically equivalent to <code>nextAfter(f, 1013 * Float.NEGATIVE_INFINITY)</code>; however, a 1014 * <code>nextDown</code> implementation may run faster than its 1015 * equivalent <code>nextAfter</code> call. 1016 * 1017 * <p>Special Cases: 1018 * <ul> 1019 * <li> If the argument is NaN, the result is NaN. 1020 * 1021 * <li> If the argument is negative infinity, the result is 1022 * negative infinity. 1023 * 1024 * <li> If the argument is zero, the result is 1025 * <code>-Float.MIN_VALUE</code> 1026 * 1027 * </ul> 1028 * 1029 * @param f starting floating-point value 1030 * @return The adjacent floating-point value closer to negative 1031 * infinity. 1032 * @author Joseph D. Darcy 1033 */ 1034 public static double nextDown(float f) { 1035 if( isNaN(f) || f == Float.NEGATIVE_INFINITY) 1036 return f; 1037 else { 1038 if (f == 0.0f) 1039 return -Float.MIN_VALUE; 1040 else 1041 return Float.intBitsToFloat(Float.floatToRawIntBits(f) + 1042 ((f > 0.0f)?-1:+1)); 1043 } 1044 } 1045 1046 /** 1047 * Returns the first floating-point argument with the sign of the 1048 * second floating-point argument. For this method, a NaN 1049 * <code>sign</code> argument is always treated as if it were 1050 * positive. 1051 * 1052 * @param magnitude the parameter providing the magnitude of the result 1053 * @param sign the parameter providing the sign of the result 1054 * @return a value with the magnitude of <code>magnitude</code> 1055 * and the sign of <code>sign</code>. 1056 * @author Joseph D. Darcy 1057 * @since 1.5 1058 */ 1059 public static double copySign(double magnitude, double sign) { 1060 return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign)); 1061 } 1062 1063 /** 1064 * Returns the first floating-point argument with the sign of the 1065 * second floating-point argument. For this method, a NaN 1066 * <code>sign</code> argument is always treated as if it were 1067 * positive. 1068 * 1069 * @param magnitude the parameter providing the magnitude of the result 1070 * @param sign the parameter providing the sign of the result 1071 * @return a value with the magnitude of <code>magnitude</code> 1072 * and the sign of <code>sign</code>. 1073 * @author Joseph D. Darcy 1074 */ 1075 public static float copySign(float magnitude, float sign) { 1076 return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign)); 1077 } 1078 1079 /** 1080 * Returns the size of an ulp of the argument. An ulp of a 1081 * <code>double</code> value is the positive distance between this 1082 * floating-point value and the <code>double</code> value next 1083 * larger in magnitude. Note that for non-NaN <i>x</i>, 1084 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>. 1085 * 1086 * <p>Special Cases: 1087 * <ul> 1088 * <li> If the argument is NaN, then the result is NaN. 1089 * <li> If the argument is positive or negative infinity, then the 1090 * result is positive infinity. 1091 * <li> If the argument is positive or negative zero, then the result is 1092 * <code>Double.MIN_VALUE</code>. 1093 * <li> If the argument is ±<code>Double.MAX_VALUE</code>, then 1094 * the result is equal to 2<sup>971</sup>. 1095 * </ul> 1096 * 1097 * @param d the floating-point value whose ulp is to be returned 1098 * @return the size of an ulp of the argument 1099 * @author Joseph D. Darcy 1100 * @since 1.5 1101 */ 1102 public static double ulp(double d) { 1103 int exp = getExponent(d); 1104 1105 switch(exp) { 1106 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity 1107 return Math.abs(d); 1108 // break; 1109 1110 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal 1111 return Double.MIN_VALUE; 1112 // break 1113 1114 default: 1115 assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT; 1116 1117 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1118 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1); 1119 if (exp >= DoubleConsts.MIN_EXPONENT) { 1120 return powerOfTwoD(exp); 1121 } 1122 else { 1123 // return a subnormal result; left shift integer 1124 // representation of Double.MIN_VALUE appropriate 1125 // number of positions 1126 return Double.longBitsToDouble(1L << 1127 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) )); 1128 } 1129 // break 1130 } 1131 } 1132 1133 /** 1134 * Returns the size of an ulp of the argument. An ulp of a 1135 * <code>float</code> value is the positive distance between this 1136 * floating-point value and the <code>float</code> value next 1137 * larger in magnitude. Note that for non-NaN <i>x</i>, 1138 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>. 1139 * 1140 * <p>Special Cases: 1141 * <ul> 1142 * <li> If the argument is NaN, then the result is NaN. 1143 * <li> If the argument is positive or negative infinity, then the 1144 * result is positive infinity. 1145 * <li> If the argument is positive or negative zero, then the result is 1146 * <code>Float.MIN_VALUE</code>. 1147 * <li> If the argument is ±<code>Float.MAX_VALUE</code>, then 1148 * the result is equal to 2<sup>104</sup>. 1149 * </ul> 1150 * 1151 * @param f the floating-point value whose ulp is to be returned 1152 * @return the size of an ulp of the argument 1153 * @author Joseph D. Darcy 1154 * @since 1.5 1155 */ 1156 public static float ulp(float f) { 1157 int exp = getExponent(f); 1158 1159 switch(exp) { 1160 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity 1161 return Math.abs(f); 1162 // break; 1163 1164 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal 1165 return FloatConsts.MIN_VALUE; 1166 // break 1167 1168 default: 1169 assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT; 1170 1171 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1172 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1); 1173 if (exp >= FloatConsts.MIN_EXPONENT) { 1174 return powerOfTwoF(exp); 1175 } 1176 else { 1177 // return a subnormal result; left shift integer 1178 // representation of FloatConsts.MIN_VALUE appropriate 1179 // number of positions 1180 return Float.intBitsToFloat(1 << 1181 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) )); 1182 } 1183 // break 1184 } 1185 } 1186 1187 /** 1188 * Returns the signum function of the argument; zero if the argument 1189 * is zero, 1.0 if the argument is greater than zero, -1.0 if the 1190 * argument is less than zero. 1191 * 1192 * <p>Special Cases: 1193 * <ul> 1194 * <li> If the argument is NaN, then the result is NaN. 1195 * <li> If the argument is positive zero or negative zero, then the 1196 * result is the same as the argument. 1197 * </ul> 1198 * 1199 * @param d the floating-point value whose signum is to be returned 1200 * @return the signum function of the argument 1201 * @author Joseph D. Darcy 1202 * @since 1.5 1203 */ | 1 /* 2 * Copyright (c) 2003, 2010 Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.misc; 27 28 import sun.misc.FloatConsts; 29 import sun.misc.DoubleConsts; 30 31 /** 32 * The class {@code FpUtils} contains static utility methods for 33 * manipulating and inspecting {@code float} and 34 * {@code double} floating-point numbers. These methods include 35 * functionality recommended or required by the IEEE 754 36 * floating-point standard. 37 * 38 * @author Joseph D. Darcy 39 */ 40 41 public class FpUtils { 42 /* 43 * The methods in this class are reasonably implemented using 44 * direct or indirect bit-level manipulation of floating-point 45 * values. However, having access to the IEEE 754 recommended 46 * functions would obviate the need for most programmers to engage 47 * in floating-point bit-twiddling. 48 * 49 * An IEEE 754 number has three fields, from most significant bit 50 * to to least significant, sign, exponent, and significand. 51 * 52 * msb lsb 53 * [sign|exponent| fractional_significand] 54 * 119 * ``100% Pure Java''(TM)," University of California, Berkeley 120 * technical report UCB//CSD-98-1009. 121 */ 122 123 /** 124 * Don't let anyone instantiate this class. 125 */ 126 private FpUtils() {} 127 128 // Constants used in scalb 129 static double twoToTheDoubleScaleUp = powerOfTwoD(512); 130 static double twoToTheDoubleScaleDown = powerOfTwoD(-512); 131 132 // Helper Methods 133 134 // The following helper methods are used in the implementation of 135 // the public recommended functions; they generally omit certain 136 // tests for exception cases. 137 138 /** 139 * Returns unbiased exponent of a {@code double}. 140 */ 141 public static int getExponent(double d){ 142 /* 143 * Bitwise convert d to long, mask out exponent bits, shift 144 * to the right and then subtract out double's bias adjust to 145 * get true exponent value. 146 */ 147 return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >> 148 (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS); 149 } 150 151 /** 152 * Returns unbiased exponent of a {@code float}. 153 */ 154 public static int getExponent(float f){ 155 /* 156 * Bitwise convert f to integer, mask out exponent bits, shift 157 * to the right and then subtract out float's bias adjust to 158 * get true exponent value 159 */ 160 return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >> 161 (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS; 162 } 163 164 /** 165 * Returns a floating-point power of two in the normal range. 166 */ 167 static double powerOfTwoD(int n) { 168 assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT); 169 return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) << 170 (DoubleConsts.SIGNIFICAND_WIDTH-1)) 171 & DoubleConsts.EXP_BIT_MASK); 172 } 173 174 /** 175 * Returns a floating-point power of two in the normal range. 176 */ 177 static float powerOfTwoF(int n) { 178 assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT); 179 return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) << 180 (FloatConsts.SIGNIFICAND_WIDTH-1)) 181 & FloatConsts.EXP_BIT_MASK); 182 } 183 184 /** 185 * Returns the first floating-point argument with the sign of the 186 * second floating-point argument. Note that unlike the {@link 187 * FpUtils#copySign(double, double) copySign} method, this method 188 * does not require NaN {@code sign} arguments to be treated 189 * as positive values; implementations are permitted to treat some 190 * NaN arguments as positive and other NaN arguments as negative 191 * to allow greater performance. 192 * 193 * @param magnitude the parameter providing the magnitude of the result 194 * @param sign the parameter providing the sign of the result 195 * @return a value with the magnitude of {@code magnitude} 196 * and the sign of {@code sign}. 197 * @author Joseph D. Darcy 198 */ 199 public static double rawCopySign(double magnitude, double sign) { 200 return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) & 201 (DoubleConsts.SIGN_BIT_MASK)) | 202 (Double.doubleToRawLongBits(magnitude) & 203 (DoubleConsts.EXP_BIT_MASK | 204 DoubleConsts.SIGNIF_BIT_MASK))); 205 } 206 207 /** 208 * Returns the first floating-point argument with the sign of the 209 * second floating-point argument. Note that unlike the {@link 210 * FpUtils#copySign(float, float) copySign} method, this method 211 * does not require NaN {@code sign} arguments to be treated 212 * as positive values; implementations are permitted to treat some 213 * NaN arguments as positive and other NaN arguments as negative 214 * to allow greater performance. 215 * 216 * @param magnitude the parameter providing the magnitude of the result 217 * @param sign the parameter providing the sign of the result 218 * @return a value with the magnitude of {@code magnitude} 219 * and the sign of {@code sign}. 220 * @author Joseph D. Darcy 221 */ 222 public static float rawCopySign(float magnitude, float sign) { 223 return Float.intBitsToFloat((Float.floatToRawIntBits(sign) & 224 (FloatConsts.SIGN_BIT_MASK)) | 225 (Float.floatToRawIntBits(magnitude) & 226 (FloatConsts.EXP_BIT_MASK | 227 FloatConsts.SIGNIF_BIT_MASK))); 228 } 229 230 /* ***************************************************************** */ 231 232 /** 233 * Returns {@code true} if the argument is a finite 234 * floating-point value; returns {@code false} otherwise (for 235 * NaN and infinity arguments). 236 * 237 * @param d the {@code double} value to be tested 238 * @return {@code true} if the argument is a finite 239 * floating-point value, {@code false} otherwise. 240 */ 241 public static boolean isFinite(double d) { 242 return Math.abs(d) <= DoubleConsts.MAX_VALUE; 243 } 244 245 /** 246 * Returns {@code true} if the argument is a finite 247 * floating-point value; returns {@code false} otherwise (for 248 * NaN and infinity arguments). 249 * 250 * @param f the {@code float} value to be tested 251 * @return {@code true} if the argument is a finite 252 * floating-point value, {@code false} otherwise. 253 */ 254 public static boolean isFinite(float f) { 255 return Math.abs(f) <= FloatConsts.MAX_VALUE; 256 } 257 258 /** 259 * Returns {@code true} if the specified number is infinitely 260 * large in magnitude, {@code false} otherwise. 261 * 262 * <p>Note that this method is equivalent to the {@link 263 * Double#isInfinite(double) Double.isInfinite} method; the 264 * functionality is included in this class for convenience. 265 * 266 * @param d the value to be tested. 267 * @return {@code true} if the value of the argument is positive 268 * infinity or negative infinity; {@code false} otherwise. 269 */ 270 public static boolean isInfinite(double d) { 271 return Double.isInfinite(d); 272 } 273 274 /** 275 * Returns {@code true} if the specified number is infinitely 276 * large in magnitude, {@code false} otherwise. 277 * 278 * <p>Note that this method is equivalent to the {@link 279 * Float#isInfinite(float) Float.isInfinite} method; the 280 * functionality is included in this class for convenience. 281 * 282 * @param f the value to be tested. 283 * @return {@code true} if the argument is positive infinity or 284 * negative infinity; {@code false} otherwise. 285 */ 286 public static boolean isInfinite(float f) { 287 return Float.isInfinite(f); 288 } 289 290 /** 291 * Returns {@code true} if the specified number is a 292 * Not-a-Number (NaN) value, {@code false} otherwise. 293 * 294 * <p>Note that this method is equivalent to the {@link 295 * Double#isNaN(double) Double.isNaN} method; the functionality is 296 * included in this class for convenience. 297 * 298 * @param d the value to be tested. 299 * @return {@code true} if the value of the argument is NaN; 300 * {@code false} otherwise. 301 */ 302 public static boolean isNaN(double d) { 303 return Double.isNaN(d); 304 } 305 306 /** 307 * Returns {@code true} if the specified number is a 308 * Not-a-Number (NaN) value, {@code false} otherwise. 309 * 310 * <p>Note that this method is equivalent to the {@link 311 * Float#isNaN(float) Float.isNaN} method; the functionality is 312 * included in this class for convenience. 313 * 314 * @param f the value to be tested. 315 * @return {@code true} if the argument is NaN; 316 * {@code false} otherwise. 317 */ 318 public static boolean isNaN(float f) { 319 return Float.isNaN(f); 320 } 321 322 /** 323 * Returns {@code true} if the unordered relation holds 324 * between the two arguments. When two floating-point values are 325 * unordered, one value is neither less than, equal to, nor 326 * greater than the other. For the unordered relation to be true, 327 * at least one argument must be a {@code NaN}. 328 * 329 * @param arg1 the first argument 330 * @param arg2 the second argument 331 * @return {@code true} if at least one argument is a NaN, 332 * {@code false} otherwise. 333 */ 334 public static boolean isUnordered(double arg1, double arg2) { 335 return isNaN(arg1) || isNaN(arg2); 336 } 337 338 /** 339 * Returns {@code true} if the unordered relation holds 340 * between the two arguments. When two floating-point values are 341 * unordered, one value is neither less than, equal to, nor 342 * greater than the other. For the unordered relation to be true, 343 * at least one argument must be a {@code NaN}. 344 * 345 * @param arg1 the first argument 346 * @param arg2 the second argument 347 * @return {@code true} if at least one argument is a NaN, 348 * {@code false} otherwise. 349 */ 350 public static boolean isUnordered(float arg1, float arg2) { 351 return isNaN(arg1) || isNaN(arg2); 352 } 353 354 /** 355 * Returns unbiased exponent of a {@code double}; for 356 * subnormal values, the number is treated as if it were 357 * normalized. That is for all finite, non-zero, positive numbers 358 * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is 359 * always in the range [1, 2). 360 * <p> 361 * Special cases: 362 * <ul> 363 * <li> If the argument is NaN, then the result is 2<sup>30</sup>. 364 * <li> If the argument is infinite, then the result is 2<sup>28</sup>. 365 * <li> If the argument is zero, then the result is -(2<sup>28</sup>). 366 * </ul> 367 * 368 * @param d floating-point number whose exponent is to be extracted 369 * @return unbiased exponent of the argument. 370 * @author Joseph D. Darcy 371 */ 372 public static int ilogb(double d) { 373 int exponent = getExponent(d); 374 375 switch (exponent) { 376 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity 377 if( isNaN(d) ) 378 return (1<<30); // 2^30 379 else // infinite value 380 return (1<<28); // 2^28 381 382 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal 383 if(d == 0.0) { 384 return -(1<<28); // -(2^28) 385 } 386 else { 387 long transducer = Double.doubleToRawLongBits(d); 388 389 /* 390 * To avoid causing slow arithmetic on subnormals, 391 * the scaling to determine when d's significand 392 * is normalized is done in integer arithmetic. 393 * (there must be at least one "1" bit in the 394 * significand since zero has been screened out. 395 */ 396 397 // isolate significand bits 398 transducer &= DoubleConsts.SIGNIF_BIT_MASK; 399 assert(transducer != 0L); 400 401 // This loop is simple and functional. We might be 402 // able to do something more clever that was faster; 403 // e.g. number of leading zero detection on 404 // (transducer << (# exponent and sign bits). 405 while (transducer < 406 (1L << (DoubleConsts.SIGNIFICAND_WIDTH - 1))) { 407 transducer *= 2; 408 exponent--; 409 } 410 exponent++; 411 assert( exponent >= 412 DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1) && 413 exponent < DoubleConsts.MIN_EXPONENT); 414 return exponent; 415 } 416 417 default: 418 assert( exponent >= DoubleConsts.MIN_EXPONENT && 419 exponent <= DoubleConsts.MAX_EXPONENT); 420 return exponent; 421 } 422 } 423 424 /** 425 * Returns unbiased exponent of a {@code float}; for 426 * subnormal values, the number is treated as if it were 427 * normalized. That is for all finite, non-zero, positive numbers 428 * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is 429 * always in the range [1, 2). 430 * <p> 431 * Special cases: 432 * <ul> 433 * <li> If the argument is NaN, then the result is 2<sup>30</sup>. 434 * <li> If the argument is infinite, then the result is 2<sup>28</sup>. 435 * <li> If the argument is zero, then the result is -(2<sup>28</sup>). 436 * </ul> 437 * 438 * @param f floating-point number whose exponent is to be extracted 439 * @return unbiased exponent of the argument. 440 * @author Joseph D. Darcy 441 */ 442 public static int ilogb(float f) { 443 int exponent = getExponent(f); 444 445 switch (exponent) { 446 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity 447 if( isNaN(f) ) 448 return (1<<30); // 2^30 449 else // infinite value 450 return (1<<28); // 2^28 451 452 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal 453 if(f == 0.0f) { 454 return -(1<<28); // -(2^28) 455 } 456 else { 457 int transducer = Float.floatToRawIntBits(f); 458 459 /* 460 * To avoid causing slow arithmetic on subnormals, 461 * the scaling to determine when f's significand 462 * is normalized is done in integer arithmetic. 463 * (there must be at least one "1" bit in the 464 * significand since zero has been screened out. 465 */ 466 467 // isolate significand bits 468 transducer &= FloatConsts.SIGNIF_BIT_MASK; 469 assert(transducer != 0); 470 471 // This loop is simple and functional. We might be 472 // able to do something more clever that was faster; 473 // e.g. number of leading zero detection on 474 // (transducer << (# exponent and sign bits). 475 while (transducer < 476 (1 << (FloatConsts.SIGNIFICAND_WIDTH - 1))) { 477 transducer *= 2; 478 exponent--; 479 } 480 exponent++; 481 assert( exponent >= 482 FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1) && 483 exponent < FloatConsts.MIN_EXPONENT); 484 return exponent; 485 } 486 487 default: 488 assert( exponent >= FloatConsts.MIN_EXPONENT && 489 exponent <= FloatConsts.MAX_EXPONENT); 490 return exponent; 491 } 492 } 493 494 495 /* 496 * The scalb operation should be reasonably fast; however, there 497 * are tradeoffs in writing a method to minimize the worst case 498 * performance and writing a method to minimize the time for 499 * expected common inputs. Some processors operate very slowly on 500 * subnormal operands, taking hundreds or thousands of cycles for 501 * one floating-point add or multiply as opposed to, say, four 502 * cycles for normal operands. For processors with very slow 503 * subnormal execution, scalb would be fastest if written entirely 504 * with integer operations; in other words, scalb would need to 505 * include the logic of performing correct rounding of subnormal 506 * values. This could be reasonably done in at most a few hundred 507 * cycles. However, this approach may penalize normal operations 508 * since at least the exponent of the floating-point argument must 509 * be examined. 510 * 511 * The approach taken in this implementation is a compromise. 512 * Floating-point multiplication is used to do most of the work; 513 * but knowingly multiplying by a subnormal scaling factor is 514 * avoided. However, the floating-point argument is not examined 515 * to see whether or not it is subnormal since subnormal inputs 516 * are assumed to be rare. At most three multiplies are needed to 517 * scale from the largest to smallest exponent ranges (scaling 518 * down, at most two multiplies are needed if subnormal scaling 519 * factors are allowed). However, in this implementation an 520 * expensive integer remainder operation is avoided at the cost of 521 * requiring five floating-point multiplies in the worst case, 522 * which should still be a performance win. 523 * 524 * If scaling of entire arrays is a concern, it would probably be 525 * more efficient to provide a double[] scalb(double[], int) 526 * version of scalb to avoid having to recompute the needed 527 * scaling factors for each floating-point value. 528 */ 529 530 /** 531 * Return {@code d} × 532 * 2<sup>{@code scale_factor}</sup> rounded as if performed 533 * by a single correctly rounded floating-point multiply to a 534 * member of the double value set. See <a 535 * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">§4.2.3</a> 536 * of the <a href="http://java.sun.com/docs/books/jls/html/">Java 537 * Language Specification</a> for a discussion of floating-point 538 * value sets. If the exponent of the result is between the 539 * {@code double}'s minimum exponent and maximum exponent, 540 * the answer is calculated exactly. If the exponent of the 541 * result would be larger than {@code doubles}'s maximum 542 * exponent, an infinity is returned. Note that if the result is 543 * subnormal, precision may be lost; that is, when {@code scalb(x, 544 * n)} is subnormal, {@code scalb(scalb(x, n), -n)} may 545 * not equal <i>x</i>. When the result is non-NaN, the result has 546 * the same sign as {@code d}. 547 * 548 *<p> 549 * Special cases: 550 * <ul> 551 * <li> If the first argument is NaN, NaN is returned. 552 * <li> If the first argument is infinite, then an infinity of the 553 * same sign is returned. 554 * <li> If the first argument is zero, then a zero of the same 555 * sign is returned. 556 * </ul> 557 * 558 * @param d number to be scaled by a power of two. 559 * @param scale_factor power of 2 used to scale {@code d} 560 * @return {@code d * }2<sup>{@code scale_factor}</sup> 561 * @author Joseph D. Darcy 562 */ 563 public static double scalb(double d, int scale_factor) { 564 /* 565 * This method does not need to be declared strictfp to 566 * compute the same correct result on all platforms. When 567 * scaling up, it does not matter what order the 568 * multiply-store operations are done; the result will be 569 * finite or overflow regardless of the operation ordering. 570 * However, to get the correct result when scaling down, a 571 * particular ordering must be used. 572 * 573 * When scaling down, the multiply-store operations are 574 * sequenced so that it is not possible for two consecutive 575 * multiply-stores to return subnormal results. If one 576 * multiply-store result is subnormal, the next multiply will 577 * round it away to zero. This is done by first multiplying 578 * by 2 ^ (scale_factor % n) and then multiplying several 579 * times by by 2^n as needed where n is the exponent of number 580 * that is a covenient power of two. In this way, at most one 621 scale_increment = 512; 622 exp_delta = twoToTheDoubleScaleUp; 623 } 624 625 // Calculate (scale_factor % +/-512), 512 = 2^9, using 626 // technique from "Hacker's Delight" section 10-2. 627 int t = (scale_factor >> 9-1) >>> 32 - 9; 628 exp_adjust = ((scale_factor + t) & (512 -1)) - t; 629 630 d *= powerOfTwoD(exp_adjust); 631 scale_factor -= exp_adjust; 632 633 while(scale_factor != 0) { 634 d *= exp_delta; 635 scale_factor -= scale_increment; 636 } 637 return d; 638 } 639 640 /** 641 * Return {@code f} × 642 * 2<sup>{@code scale_factor}</sup> rounded as if performed 643 * by a single correctly rounded floating-point multiply to a 644 * member of the float value set. See <a 645 * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">§4.2.3</a> 646 * of the <a href="http://java.sun.com/docs/books/jls/html/">Java 647 * Language Specification</a> for a discussion of floating-point 648 * value set. If the exponent of the result is between the 649 * {@code float}'s minimum exponent and maximum exponent, the 650 * answer is calculated exactly. If the exponent of the result 651 * would be larger than {@code float}'s maximum exponent, an 652 * infinity is returned. Note that if the result is subnormal, 653 * precision may be lost; that is, when {@code scalb(x, n)} 654 * is subnormal, {@code scalb(scalb(x, n), -n)} may not equal 655 * <i>x</i>. When the result is non-NaN, the result has the same 656 * sign as {@code f}. 657 * 658 *<p> 659 * Special cases: 660 * <ul> 661 * <li> If the first argument is NaN, NaN is returned. 662 * <li> If the first argument is infinite, then an infinity of the 663 * same sign is returned. 664 * <li> If the first argument is zero, then a zero of the same 665 * sign is returned. 666 * </ul> 667 * 668 * @param f number to be scaled by a power of two. 669 * @param scale_factor power of 2 used to scale {@code f} 670 * @return {@code f * }2<sup>{@code scale_factor}</sup> 671 * @author Joseph D. Darcy 672 */ 673 public static float scalb(float f, int scale_factor) { 674 // magnitude of a power of two so large that scaling a finite 675 // nonzero value by it would be guaranteed to over or 676 // underflow; due to rounding, scaling down takes takes an 677 // additional power of two which is reflected here 678 final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT + 679 FloatConsts.SIGNIFICAND_WIDTH + 1; 680 681 // Make sure scaling factor is in a reasonable range 682 scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE); 683 684 /* 685 * Since + MAX_SCALE for float fits well within the double 686 * exponent range and + float -> double conversion is exact 687 * the multiplication below will be exact. Therefore, the 688 * rounding that occurs when the double product is cast to 689 * float will be the correctly rounded float result. Since 690 * all operations other than the final multiply will be exact, 691 * it is not necessary to declare this method strictfp. 692 */ 693 return (float)((double)f*powerOfTwoD(scale_factor)); 694 } 695 696 /** 697 * Returns the floating-point number adjacent to the first 698 * argument in the direction of the second argument. If both 699 * arguments compare as equal the second argument is returned. 700 * 701 * <p> 702 * Special cases: 703 * <ul> 704 * <li> If either argument is a NaN, then NaN is returned. 705 * 706 * <li> If both arguments are signed zeros, {@code direction} 707 * is returned unchanged (as implied by the requirement of 708 * returning the second argument if the arguments compare as 709 * equal). 710 * 711 * <li> If {@code start} is 712 * ±{@code Double.MIN_VALUE} and {@code direction} 713 * has a value such that the result should have a smaller 714 * magnitude, then a zero with the same sign as {@code start} 715 * is returned. 716 * 717 * <li> If {@code start} is infinite and 718 * {@code direction} has a value such that the result should 719 * have a smaller magnitude, {@code Double.MAX_VALUE} with the 720 * same sign as {@code start} is returned. 721 * 722 * <li> If {@code start} is equal to ± 723 * {@code Double.MAX_VALUE} and {@code direction} has a 724 * value such that the result should have a larger magnitude, an 725 * infinity with same sign as {@code start} is returned. 726 * </ul> 727 * 728 * @param start starting floating-point value 729 * @param direction value indicating which of 730 * {@code start}'s neighbors or {@code start} should 731 * be returned 732 * @return The floating-point number adjacent to {@code start} in the 733 * direction of {@code direction}. 734 * @author Joseph D. Darcy 735 */ 736 public static double nextAfter(double start, double direction) { 737 /* 738 * The cases: 739 * 740 * nextAfter(+infinity, 0) == MAX_VALUE 741 * nextAfter(+infinity, +infinity) == +infinity 742 * nextAfter(-infinity, 0) == -MAX_VALUE 743 * nextAfter(-infinity, -infinity) == -infinity 744 * 745 * are naturally handled without any additional testing 746 */ 747 748 // First check for NaN values 749 if (isNaN(start) || isNaN(direction)) { 750 // return a NaN derived from the input NaN(s) 751 return start + direction; 752 } else if (start == direction) { 753 return direction; 786 * explicitly. 787 */ 788 else 789 transducer = DoubleConsts.SIGN_BIT_MASK | 1L; 790 } 791 792 return Double.longBitsToDouble(transducer); 793 } 794 } 795 796 /** 797 * Returns the floating-point number adjacent to the first 798 * argument in the direction of the second argument. If both 799 * arguments compare as equal, the second argument is returned. 800 * 801 * <p> 802 * Special cases: 803 * <ul> 804 * <li> If either argument is a NaN, then NaN is returned. 805 * 806 * <li> If both arguments are signed zeros, a {@code float} 807 * zero with the same sign as {@code direction} is returned 808 * (as implied by the requirement of returning the second argument 809 * if the arguments compare as equal). 810 * 811 * <li> If {@code start} is 812 * ±{@code Float.MIN_VALUE} and {@code direction} 813 * has a value such that the result should have a smaller 814 * magnitude, then a zero with the same sign as {@code start} 815 * is returned. 816 * 817 * <li> If {@code start} is infinite and 818 * {@code direction} has a value such that the result should 819 * have a smaller magnitude, {@code Float.MAX_VALUE} with the 820 * same sign as {@code start} is returned. 821 * 822 * <li> If {@code start} is equal to ± 823 * {@code Float.MAX_VALUE} and {@code direction} has a 824 * value such that the result should have a larger magnitude, an 825 * infinity with same sign as {@code start} is returned. 826 * </ul> 827 * 828 * @param start starting floating-point value 829 * @param direction value indicating which of 830 * {@code start}'s neighbors or {@code start} should 831 * be returned 832 * @return The floating-point number adjacent to {@code start} in the 833 * direction of {@code direction}. 834 * @author Joseph D. Darcy 835 */ 836 public static float nextAfter(float start, double direction) { 837 /* 838 * The cases: 839 * 840 * nextAfter(+infinity, 0) == MAX_VALUE 841 * nextAfter(+infinity, +infinity) == +infinity 842 * nextAfter(-infinity, 0) == -MAX_VALUE 843 * nextAfter(-infinity, -infinity) == -infinity 844 * 845 * are naturally handled without any additional testing 846 */ 847 848 // First check for NaN values 849 if (isNaN(start) || isNaN(direction)) { 850 // return a NaN derived from the input NaN(s) 851 return start + (float)direction; 852 } else if (start == direction) { 853 return (float)direction; 877 else 878 if (transducer < 0 ) 879 ++transducer; 880 /* 881 * transducer==0, the result is -MIN_VALUE 882 * 883 * The transition from zero (implicitly 884 * positive) to the smallest negative 885 * signed magnitude value must be done 886 * explicitly. 887 */ 888 else 889 transducer = FloatConsts.SIGN_BIT_MASK | 1; 890 } 891 892 return Float.intBitsToFloat(transducer); 893 } 894 } 895 896 /** 897 * Returns the floating-point value adjacent to {@code d} in 898 * the direction of positive infinity. This method is 899 * semantically equivalent to {@code nextAfter(d, 900 * Double.POSITIVE_INFINITY)}; however, a {@code nextUp} 901 * implementation may run faster than its equivalent 902 * {@code nextAfter} call. 903 * 904 * <p>Special Cases: 905 * <ul> 906 * <li> If the argument is NaN, the result is NaN. 907 * 908 * <li> If the argument is positive infinity, the result is 909 * positive infinity. 910 * 911 * <li> If the argument is zero, the result is 912 * {@code Double.MIN_VALUE} 913 * 914 * </ul> 915 * 916 * @param d starting floating-point value 917 * @return The adjacent floating-point value closer to positive 918 * infinity. 919 * @author Joseph D. Darcy 920 */ 921 public static double nextUp(double d) { 922 if( isNaN(d) || d == Double.POSITIVE_INFINITY) 923 return d; 924 else { 925 d += 0.0d; 926 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) + 927 ((d >= 0.0d)?+1L:-1L)); 928 } 929 } 930 931 /** 932 * Returns the floating-point value adjacent to {@code f} in 933 * the direction of positive infinity. This method is 934 * semantically equivalent to {@code nextAfter(f, 935 * Double.POSITIVE_INFINITY)}; however, a {@code nextUp} 936 * implementation may run faster than its equivalent 937 * {@code nextAfter} call. 938 * 939 * <p>Special Cases: 940 * <ul> 941 * <li> If the argument is NaN, the result is NaN. 942 * 943 * <li> If the argument is positive infinity, the result is 944 * positive infinity. 945 * 946 * <li> If the argument is zero, the result is 947 * {@code Float.MIN_VALUE} 948 * 949 * </ul> 950 * 951 * @param f starting floating-point value 952 * @return The adjacent floating-point value closer to positive 953 * infinity. 954 * @author Joseph D. Darcy 955 */ 956 public static float nextUp(float f) { 957 if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY) 958 return f; 959 else { 960 f += 0.0f; 961 return Float.intBitsToFloat(Float.floatToRawIntBits(f) + 962 ((f >= 0.0f)?+1:-1)); 963 } 964 } 965 966 /** 967 * Returns the floating-point value adjacent to {@code d} in 968 * the direction of negative infinity. This method is 969 * semantically equivalent to {@code nextAfter(d, 970 * Double.NEGATIVE_INFINITY)}; however, a 971 * {@code nextDown} implementation may run faster than its 972 * equivalent {@code nextAfter} call. 973 * 974 * <p>Special Cases: 975 * <ul> 976 * <li> If the argument is NaN, the result is NaN. 977 * 978 * <li> If the argument is negative infinity, the result is 979 * negative infinity. 980 * 981 * <li> If the argument is zero, the result is 982 * {@code -Double.MIN_VALUE} 983 * 984 * </ul> 985 * 986 * @param d starting floating-point value 987 * @return The adjacent floating-point value closer to negative 988 * infinity. 989 * @author Joseph D. Darcy 990 */ 991 public static double nextDown(double d) { 992 if( isNaN(d) || d == Double.NEGATIVE_INFINITY) 993 return d; 994 else { 995 if (d == 0.0) 996 return -Double.MIN_VALUE; 997 else 998 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) + 999 ((d > 0.0d)?-1L:+1L)); 1000 } 1001 } 1002 1003 /** 1004 * Returns the floating-point value adjacent to {@code f} in 1005 * the direction of negative infinity. This method is 1006 * semantically equivalent to {@code nextAfter(f, 1007 * Float.NEGATIVE_INFINITY)}; however, a 1008 * {@code nextDown} implementation may run faster than its 1009 * equivalent {@code nextAfter} call. 1010 * 1011 * <p>Special Cases: 1012 * <ul> 1013 * <li> If the argument is NaN, the result is NaN. 1014 * 1015 * <li> If the argument is negative infinity, the result is 1016 * negative infinity. 1017 * 1018 * <li> If the argument is zero, the result is 1019 * {@code -Float.MIN_VALUE} 1020 * 1021 * </ul> 1022 * 1023 * @param f starting floating-point value 1024 * @return The adjacent floating-point value closer to negative 1025 * infinity. 1026 * @author Joseph D. Darcy 1027 */ 1028 public static double nextDown(float f) { 1029 if( isNaN(f) || f == Float.NEGATIVE_INFINITY) 1030 return f; 1031 else { 1032 if (f == 0.0f) 1033 return -Float.MIN_VALUE; 1034 else 1035 return Float.intBitsToFloat(Float.floatToRawIntBits(f) + 1036 ((f > 0.0f)?-1:+1)); 1037 } 1038 } 1039 1040 /** 1041 * Returns the first floating-point argument with the sign of the 1042 * second floating-point argument. For this method, a NaN 1043 * {@code sign} argument is always treated as if it were 1044 * positive. 1045 * 1046 * @param magnitude the parameter providing the magnitude of the result 1047 * @param sign the parameter providing the sign of the result 1048 * @return a value with the magnitude of {@code magnitude} 1049 * and the sign of {@code sign}. 1050 * @author Joseph D. Darcy 1051 * @since 1.5 1052 */ 1053 public static double copySign(double magnitude, double sign) { 1054 return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign)); 1055 } 1056 1057 /** 1058 * Returns the first floating-point argument with the sign of the 1059 * second floating-point argument. For this method, a NaN 1060 * {@code sign} argument is always treated as if it were 1061 * positive. 1062 * 1063 * @param magnitude the parameter providing the magnitude of the result 1064 * @param sign the parameter providing the sign of the result 1065 * @return a value with the magnitude of {@code magnitude} 1066 * and the sign of {@code sign}. 1067 * @author Joseph D. Darcy 1068 */ 1069 public static float copySign(float magnitude, float sign) { 1070 return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign)); 1071 } 1072 1073 /** 1074 * Returns the size of an ulp of the argument. An ulp of a 1075 * {@code double} value is the positive distance between this 1076 * floating-point value and the {@code double} value next 1077 * larger in magnitude. Note that for non-NaN <i>x</i>, 1078 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>. 1079 * 1080 * <p>Special Cases: 1081 * <ul> 1082 * <li> If the argument is NaN, then the result is NaN. 1083 * <li> If the argument is positive or negative infinity, then the 1084 * result is positive infinity. 1085 * <li> If the argument is positive or negative zero, then the result is 1086 * {@code Double.MIN_VALUE}. 1087 * <li> If the argument is ±{@code Double.MAX_VALUE}, then 1088 * the result is equal to 2<sup>971</sup>. 1089 * </ul> 1090 * 1091 * @param d the floating-point value whose ulp is to be returned 1092 * @return the size of an ulp of the argument 1093 * @author Joseph D. Darcy 1094 * @since 1.5 1095 */ 1096 public static double ulp(double d) { 1097 int exp = getExponent(d); 1098 1099 switch(exp) { 1100 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity 1101 return Math.abs(d); 1102 1103 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal 1104 return Double.MIN_VALUE; 1105 1106 default: 1107 assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT; 1108 1109 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1110 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1); 1111 if (exp >= DoubleConsts.MIN_EXPONENT) { 1112 return powerOfTwoD(exp); 1113 } 1114 else { 1115 // return a subnormal result; left shift integer 1116 // representation of Double.MIN_VALUE appropriate 1117 // number of positions 1118 return Double.longBitsToDouble(1L << 1119 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) )); 1120 } 1121 } 1122 } 1123 1124 /** 1125 * Returns the size of an ulp of the argument. An ulp of a 1126 * {@code float} value is the positive distance between this 1127 * floating-point value and the {@code float} value next 1128 * larger in magnitude. Note that for non-NaN <i>x</i>, 1129 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>. 1130 * 1131 * <p>Special Cases: 1132 * <ul> 1133 * <li> If the argument is NaN, then the result is NaN. 1134 * <li> If the argument is positive or negative infinity, then the 1135 * result is positive infinity. 1136 * <li> If the argument is positive or negative zero, then the result is 1137 * {@code Float.MIN_VALUE}. 1138 * <li> If the argument is ±{@code Float.MAX_VALUE}, then 1139 * the result is equal to 2<sup>104</sup>. 1140 * </ul> 1141 * 1142 * @param f the floating-point value whose ulp is to be returned 1143 * @return the size of an ulp of the argument 1144 * @author Joseph D. Darcy 1145 * @since 1.5 1146 */ 1147 public static float ulp(float f) { 1148 int exp = getExponent(f); 1149 1150 switch(exp) { 1151 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity 1152 return Math.abs(f); 1153 1154 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal 1155 return FloatConsts.MIN_VALUE; 1156 1157 default: 1158 assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT; 1159 1160 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x)) 1161 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1); 1162 if (exp >= FloatConsts.MIN_EXPONENT) { 1163 return powerOfTwoF(exp); 1164 } 1165 else { 1166 // return a subnormal result; left shift integer 1167 // representation of FloatConsts.MIN_VALUE appropriate 1168 // number of positions 1169 return Float.intBitsToFloat(1 << 1170 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) )); 1171 } 1172 } 1173 } 1174 1175 /** 1176 * Returns the signum function of the argument; zero if the argument 1177 * is zero, 1.0 if the argument is greater than zero, -1.0 if the 1178 * argument is less than zero. 1179 * 1180 * <p>Special Cases: 1181 * <ul> 1182 * <li> If the argument is NaN, then the result is NaN. 1183 * <li> If the argument is positive zero or negative zero, then the 1184 * result is the same as the argument. 1185 * </ul> 1186 * 1187 * @param d the floating-point value whose signum is to be returned 1188 * @return the signum function of the argument 1189 * @author Joseph D. Darcy 1190 * @since 1.5 1191 */ |