src/share/classes/sun/misc/FpUtils.java

Print this page


   1 /*
   2  * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.misc;
  27 
  28 import sun.misc.FloatConsts;
  29 import sun.misc.DoubleConsts;
  30 
  31 /**
  32  * The class <code>FpUtils</code> contains static utility methods for
  33  * manipulating and inspecting <code>float</code> and
  34  * <code>double</code> floating-point numbers.  These methods include
  35  * functionality recommended or required by the IEEE 754
  36  * floating-point standard.
  37  *
  38  * @author Joseph D. Darcy
  39  */
  40 
  41 public class FpUtils {
  42     /*
  43      * The methods in this class are reasonably implemented using
  44      * direct or indirect bit-level manipulation of floating-point
  45      * values.  However, having access to the IEEE 754 recommended
  46      * functions would obviate the need for most programmers to engage
  47      * in floating-point bit-twiddling.
  48      *
  49      * An IEEE 754 number has three fields, from most significant bit
  50      * to to least significant, sign, exponent, and significand.
  51      *
  52      *  msb                                lsb
  53      * [sign|exponent|  fractional_significand]
  54      *


 119      * ``100% Pure Java''(TM)," University of California, Berkeley
 120      * technical report UCB//CSD-98-1009.
 121      */
 122 
 123     /**
 124      * Don't let anyone instantiate this class.
 125      */
 126     private FpUtils() {}
 127 
 128     // Constants used in scalb
 129     static double twoToTheDoubleScaleUp = powerOfTwoD(512);
 130     static double twoToTheDoubleScaleDown = powerOfTwoD(-512);
 131 
 132     // Helper Methods
 133 
 134     // The following helper methods are used in the implementation of
 135     // the public recommended functions; they generally omit certain
 136     // tests for exception cases.
 137 
 138     /**
 139      * Returns unbiased exponent of a <code>double</code>.
 140      */
 141     public static int getExponent(double d){
 142         /*
 143          * Bitwise convert d to long, mask out exponent bits, shift
 144          * to the right and then subtract out double's bias adjust to
 145          * get true exponent value.
 146          */
 147         return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >>
 148                       (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS);
 149     }
 150 
 151     /**
 152      * Returns unbiased exponent of a <code>float</code>.
 153      */
 154     public static int getExponent(float f){
 155         /*
 156          * Bitwise convert f to integer, mask out exponent bits, shift
 157          * to the right and then subtract out float's bias adjust to
 158          * get true exponent value
 159          */
 160         return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >>
 161                 (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS;
 162     }
 163 
 164     /**
 165      * Returns a floating-point power of two in the normal range.
 166      */
 167     static double powerOfTwoD(int n) {
 168         assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT);
 169         return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) <<
 170                                         (DoubleConsts.SIGNIFICAND_WIDTH-1))
 171                                        & DoubleConsts.EXP_BIT_MASK);
 172     }
 173 
 174     /**
 175      * Returns a floating-point power of two in the normal range.
 176      */
 177     static float powerOfTwoF(int n) {
 178         assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT);
 179         return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) <<
 180                                      (FloatConsts.SIGNIFICAND_WIDTH-1))
 181                                     & FloatConsts.EXP_BIT_MASK);
 182     }
 183 
 184     /**
 185      * Returns the first floating-point argument with the sign of the
 186      * second floating-point argument.  Note that unlike the {@link
 187      * FpUtils#copySign(double, double) copySign} method, this method
 188      * does not require NaN <code>sign</code> arguments to be treated
 189      * as positive values; implementations are permitted to treat some
 190      * NaN arguments as positive and other NaN arguments as negative
 191      * to allow greater performance.
 192      *
 193      * @param magnitude  the parameter providing the magnitude of the result
 194      * @param sign   the parameter providing the sign of the result
 195      * @return a value with the magnitude of <code>magnitude</code>
 196      * and the sign of <code>sign</code>.
 197      * @author Joseph D. Darcy
 198      */
 199     public static double rawCopySign(double magnitude, double sign) {
 200         return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) &
 201                                         (DoubleConsts.SIGN_BIT_MASK)) |
 202                                        (Double.doubleToRawLongBits(magnitude) &
 203                                         (DoubleConsts.EXP_BIT_MASK |
 204                                          DoubleConsts.SIGNIF_BIT_MASK)));
 205     }
 206 
 207     /**
 208      * Returns the first floating-point argument with the sign of the
 209      * second floating-point argument.  Note that unlike the {@link
 210      * FpUtils#copySign(float, float) copySign} method, this method
 211      * does not require NaN <code>sign</code> arguments to be treated
 212      * as positive values; implementations are permitted to treat some
 213      * NaN arguments as positive and other NaN arguments as negative
 214      * to allow greater performance.
 215      *
 216      * @param magnitude  the parameter providing the magnitude of the result
 217      * @param sign   the parameter providing the sign of the result
 218      * @return a value with the magnitude of <code>magnitude</code>
 219      * and the sign of <code>sign</code>.
 220      * @author Joseph D. Darcy
 221      */
 222     public static float rawCopySign(float magnitude, float sign) {
 223         return Float.intBitsToFloat((Float.floatToRawIntBits(sign) &
 224                                      (FloatConsts.SIGN_BIT_MASK)) |
 225                                     (Float.floatToRawIntBits(magnitude) &
 226                                      (FloatConsts.EXP_BIT_MASK |
 227                                       FloatConsts.SIGNIF_BIT_MASK)));
 228     }
 229 
 230     /* ***************************************************************** */
 231 
 232     /**
 233      * Returns <code>true</code> if the argument is a finite
 234      * floating-point value; returns <code>false</code> otherwise (for
 235      * NaN and infinity arguments).
 236      *
 237      * @param d the <code>double</code> value to be tested
 238      * @return <code>true</code> if the argument is a finite
 239      * floating-point value, <code>false</code> otherwise.
 240      */
 241     public static boolean isFinite(double d) {
 242         return Math.abs(d) <= DoubleConsts.MAX_VALUE;
 243     }
 244 
 245     /**
 246      * Returns <code>true</code> if the argument is a finite
 247      * floating-point value; returns <code>false</code> otherwise (for
 248      * NaN and infinity arguments).
 249      *
 250      * @param f the <code>float</code> value to be tested
 251      * @return <code>true</code> if the argument is a finite
 252      * floating-point value, <code>false</code> otherwise.
 253      */
 254      public static boolean isFinite(float f) {
 255         return Math.abs(f) <= FloatConsts.MAX_VALUE;
 256     }
 257 
 258     /**
 259      * Returns <code>true</code> if the specified number is infinitely
 260      * large in magnitude, <code>false</code> otherwise.
 261      *
 262      * <p>Note that this method is equivalent to the {@link
 263      * Double#isInfinite(double) Double.isInfinite} method; the
 264      * functionality is included in this class for convenience.
 265      *
 266      * @param   d   the value to be tested.
 267      * @return  <code>true</code> if the value of the argument is positive
 268      *          infinity or negative infinity; <code>false</code> otherwise.
 269      */
 270     public static boolean isInfinite(double d) {
 271         return Double.isInfinite(d);
 272     }
 273 
 274     /**
 275      * Returns <code>true</code> if the specified number is infinitely
 276      * large in magnitude, <code>false</code> otherwise.
 277      *
 278      * <p>Note that this method is equivalent to the {@link
 279      * Float#isInfinite(float) Float.isInfinite} method; the
 280      * functionality is included in this class for convenience.
 281      *
 282      * @param   f   the value to be tested.
 283      * @return  <code>true</code> if the argument is positive infinity or
 284      *          negative infinity; <code>false</code> otherwise.
 285      */
 286      public static boolean isInfinite(float f) {
 287          return Float.isInfinite(f);
 288     }
 289 
 290     /**
 291      * Returns <code>true</code> if the specified number is a
 292      * Not-a-Number (NaN) value, <code>false</code> otherwise.
 293      *
 294      * <p>Note that this method is equivalent to the {@link
 295      * Double#isNaN(double) Double.isNaN} method; the functionality is
 296      * included in this class for convenience.
 297      *
 298      * @param   d   the value to be tested.
 299      * @return  <code>true</code> if the value of the argument is NaN;
 300      *          <code>false</code> otherwise.
 301      */
 302     public static boolean isNaN(double d) {
 303         return Double.isNaN(d);
 304     }
 305 
 306     /**
 307      * Returns <code>true</code> if the specified number is a
 308      * Not-a-Number (NaN) value, <code>false</code> otherwise.
 309      *
 310      * <p>Note that this method is equivalent to the {@link
 311      * Float#isNaN(float) Float.isNaN} method; the functionality is
 312      * included in this class for convenience.
 313      *
 314      * @param   f   the value to be tested.
 315      * @return  <code>true</code> if the argument is NaN;
 316      *          <code>false</code> otherwise.
 317      */
 318      public static boolean isNaN(float f) {
 319         return Float.isNaN(f);
 320     }
 321 
 322     /**
 323      * Returns <code>true</code> if the unordered relation holds
 324      * between the two arguments.  When two floating-point values are
 325      * unordered, one value is neither less than, equal to, nor
 326      * greater than the other.  For the unordered relation to be true,
 327      * at least one argument must be a <code>NaN</code>.
 328      *
 329      * @param arg1      the first argument
 330      * @param arg2      the second argument
 331      * @return <code>true</code> if at least one argument is a NaN,
 332      * <code>false</code> otherwise.
 333      */
 334     public static boolean isUnordered(double arg1, double arg2) {
 335         return isNaN(arg1) || isNaN(arg2);
 336     }
 337 
 338     /**
 339      * Returns <code>true</code> if the unordered relation holds
 340      * between the two arguments.  When two floating-point values are
 341      * unordered, one value is neither less than, equal to, nor
 342      * greater than the other.  For the unordered relation to be true,
 343      * at least one argument must be a <code>NaN</code>.
 344      *
 345      * @param arg1      the first argument
 346      * @param arg2      the second argument
 347      * @return <code>true</code> if at least one argument is a NaN,
 348      * <code>false</code> otherwise.
 349      */
 350      public static boolean isUnordered(float arg1, float arg2) {
 351         return isNaN(arg1) || isNaN(arg2);
 352     }
 353 
 354     /**
 355      * Returns unbiased exponent of a <code>double</code>; for
 356      * subnormal values, the number is treated as if it were
 357      * normalized.  That is for all finite, non-zero, positive numbers
 358      * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is
 359      * always in the range [1, 2).
 360      * <p>
 361      * Special cases:
 362      * <ul>
 363      * <li> If the argument is NaN, then the result is 2<sup>30</sup>.
 364      * <li> If the argument is infinite, then the result is 2<sup>28</sup>.
 365      * <li> If the argument is zero, then the result is -(2<sup>28</sup>).
 366      * </ul>
 367      *
 368      * @param d floating-point number whose exponent is to be extracted
 369      * @return unbiased exponent of the argument.
 370      * @author Joseph D. Darcy
 371      */
 372     public static int ilogb(double d) {
 373         int exponent = getExponent(d);
 374 
 375         switch (exponent) {
 376         case DoubleConsts.MAX_EXPONENT+1:       // NaN or infinity
 377             if( isNaN(d) )
 378                 return (1<<30);         // 2^30
 379             else // infinite value
 380                 return (1<<28);         // 2^28
 381         // break;
 382 
 383         case DoubleConsts.MIN_EXPONENT-1:       // zero or subnormal
 384             if(d == 0.0) {
 385                 return -(1<<28);        // -(2^28)
 386             }
 387             else {
 388                 long transducer = Double.doubleToRawLongBits(d);
 389 
 390                 /*
 391                  * To avoid causing slow arithmetic on subnormals,
 392                  * the scaling to determine when d's significand
 393                  * is normalized is done in integer arithmetic.
 394                  * (there must be at least one "1" bit in the
 395                  * significand since zero has been screened out.
 396                  */
 397 
 398                 // isolate significand bits
 399                 transducer &= DoubleConsts.SIGNIF_BIT_MASK;
 400                 assert(transducer != 0L);
 401 
 402                 // This loop is simple and functional. We might be
 403                 // able to do something more clever that was faster;
 404                 // e.g. number of leading zero detection on
 405                 // (transducer << (# exponent and sign bits).
 406                 while (transducer <
 407                        (1L << (DoubleConsts.SIGNIFICAND_WIDTH - 1))) {
 408                     transducer *= 2;
 409                     exponent--;
 410                 }
 411                 exponent++;
 412                 assert( exponent >=
 413                         DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1) &&
 414                         exponent < DoubleConsts.MIN_EXPONENT);
 415                 return exponent;
 416             }
 417         // break;
 418 
 419         default:
 420             assert( exponent >= DoubleConsts.MIN_EXPONENT &&
 421                     exponent <= DoubleConsts.MAX_EXPONENT);
 422             return exponent;
 423         // break;
 424         }
 425     }
 426 
 427     /**
 428      * Returns unbiased exponent of a <code>float</code>; for
 429      * subnormal values, the number is treated as if it were
 430      * normalized.  That is for all finite, non-zero, positive numbers
 431      * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is
 432      * always in the range [1, 2).
 433      * <p>
 434      * Special cases:
 435      * <ul>
 436      * <li> If the argument is NaN, then the result is 2<sup>30</sup>.
 437      * <li> If the argument is infinite, then the result is 2<sup>28</sup>.
 438      * <li> If the argument is zero, then the result is -(2<sup>28</sup>).
 439      * </ul>
 440      *
 441      * @param f floating-point number whose exponent is to be extracted
 442      * @return unbiased exponent of the argument.
 443      * @author Joseph D. Darcy
 444      */
 445      public static int ilogb(float f) {
 446         int exponent = getExponent(f);
 447 
 448         switch (exponent) {
 449         case FloatConsts.MAX_EXPONENT+1:        // NaN or infinity
 450             if( isNaN(f) )
 451                 return (1<<30);         // 2^30
 452             else // infinite value
 453                 return (1<<28);         // 2^28
 454         // break;
 455 
 456         case FloatConsts.MIN_EXPONENT-1:        // zero or subnormal
 457             if(f == 0.0f) {
 458                 return -(1<<28);        // -(2^28)
 459             }
 460             else {
 461                 int transducer = Float.floatToRawIntBits(f);
 462 
 463                 /*
 464                  * To avoid causing slow arithmetic on subnormals,
 465                  * the scaling to determine when f's significand
 466                  * is normalized is done in integer arithmetic.
 467                  * (there must be at least one "1" bit in the
 468                  * significand since zero has been screened out.
 469                  */
 470 
 471                 // isolate significand bits
 472                 transducer &= FloatConsts.SIGNIF_BIT_MASK;
 473                 assert(transducer != 0);
 474 
 475                 // This loop is simple and functional. We might be
 476                 // able to do something more clever that was faster;
 477                 // e.g. number of leading zero detection on
 478                 // (transducer << (# exponent and sign bits).
 479                 while (transducer <
 480                        (1 << (FloatConsts.SIGNIFICAND_WIDTH - 1))) {
 481                     transducer *= 2;
 482                     exponent--;
 483                 }
 484                 exponent++;
 485                 assert( exponent >=
 486                         FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1) &&
 487                         exponent < FloatConsts.MIN_EXPONENT);
 488                 return exponent;
 489             }
 490         // break;
 491 
 492         default:
 493             assert( exponent >= FloatConsts.MIN_EXPONENT &&
 494                     exponent <= FloatConsts.MAX_EXPONENT);
 495             return exponent;
 496         // break;
 497         }
 498     }
 499 
 500 
 501     /*
 502      * The scalb operation should be reasonably fast; however, there
 503      * are tradeoffs in writing a method to minimize the worst case
 504      * performance and writing a method to minimize the time for
 505      * expected common inputs.  Some processors operate very slowly on
 506      * subnormal operands, taking hundreds or thousands of cycles for
 507      * one floating-point add or multiply as opposed to, say, four
 508      * cycles for normal operands.  For processors with very slow
 509      * subnormal execution, scalb would be fastest if written entirely
 510      * with integer operations; in other words, scalb would need to
 511      * include the logic of performing correct rounding of subnormal
 512      * values.  This could be reasonably done in at most a few hundred
 513      * cycles.  However, this approach may penalize normal operations
 514      * since at least the exponent of the floating-point argument must
 515      * be examined.
 516      *
 517      * The approach taken in this implementation is a compromise.
 518      * Floating-point multiplication is used to do most of the work;
 519      * but knowingly multiplying by a subnormal scaling factor is
 520      * avoided.  However, the floating-point argument is not examined
 521      * to see whether or not it is subnormal since subnormal inputs
 522      * are assumed to be rare.  At most three multiplies are needed to
 523      * scale from the largest to smallest exponent ranges (scaling
 524      * down, at most two multiplies are needed if subnormal scaling
 525      * factors are allowed).  However, in this implementation an
 526      * expensive integer remainder operation is avoided at the cost of
 527      * requiring five floating-point multiplies in the worst case,
 528      * which should still be a performance win.
 529      *
 530      * If scaling of entire arrays is a concern, it would probably be
 531      * more efficient to provide a double[] scalb(double[], int)
 532      * version of scalb to avoid having to recompute the needed
 533      * scaling factors for each floating-point value.
 534      */
 535 
 536     /**
 537      * Return <code>d</code> &times;
 538      * 2<sup><code>scale_factor</code></sup> rounded as if performed
 539      * by a single correctly rounded floating-point multiply to a
 540      * member of the double value set.  See <a
 541      * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">&sect;4.2.3</a>
 542      * of the <a href="http://java.sun.com/docs/books/jls/html/">Java
 543      * Language Specification</a> for a discussion of floating-point
 544      * value sets.  If the exponent of the result is between the
 545      * <code>double</code>'s minimum exponent and maximum exponent,
 546      * the answer is calculated exactly.  If the exponent of the
 547      * result would be larger than <code>doubles</code>'s maximum
 548      * exponent, an infinity is returned.  Note that if the result is
 549      * subnormal, precision may be lost; that is, when <code>scalb(x,
 550      * n)</code> is subnormal, <code>scalb(scalb(x, n), -n)</code> may
 551      * not equal <i>x</i>.  When the result is non-NaN, the result has
 552      * the same sign as <code>d</code>.
 553      *
 554      *<p>
 555      * Special cases:
 556      * <ul>
 557      * <li> If the first argument is NaN, NaN is returned.
 558      * <li> If the first argument is infinite, then an infinity of the
 559      * same sign is returned.
 560      * <li> If the first argument is zero, then a zero of the same
 561      * sign is returned.
 562      * </ul>
 563      *
 564      * @param d number to be scaled by a power of two.
 565      * @param scale_factor power of 2 used to scale <code>d</code>
 566      * @return <code>d * </code>2<sup><code>scale_factor</code></sup>
 567      * @author Joseph D. Darcy
 568      */
 569     public static double scalb(double d, int scale_factor) {
 570         /*
 571          * This method does not need to be declared strictfp to
 572          * compute the same correct result on all platforms.  When
 573          * scaling up, it does not matter what order the
 574          * multiply-store operations are done; the result will be
 575          * finite or overflow regardless of the operation ordering.
 576          * However, to get the correct result when scaling down, a
 577          * particular ordering must be used.
 578          *
 579          * When scaling down, the multiply-store operations are
 580          * sequenced so that it is not possible for two consecutive
 581          * multiply-stores to return subnormal results.  If one
 582          * multiply-store result is subnormal, the next multiply will
 583          * round it away to zero.  This is done by first multiplying
 584          * by 2 ^ (scale_factor % n) and then multiplying several
 585          * times by by 2^n as needed where n is the exponent of number
 586          * that is a covenient power of two.  In this way, at most one


 627             scale_increment = 512;
 628             exp_delta = twoToTheDoubleScaleUp;
 629         }
 630 
 631         // Calculate (scale_factor % +/-512), 512 = 2^9, using
 632         // technique from "Hacker's Delight" section 10-2.
 633         int t = (scale_factor >> 9-1) >>> 32 - 9;
 634         exp_adjust = ((scale_factor + t) & (512 -1)) - t;
 635 
 636         d *= powerOfTwoD(exp_adjust);
 637         scale_factor -= exp_adjust;
 638 
 639         while(scale_factor != 0) {
 640             d *= exp_delta;
 641             scale_factor -= scale_increment;
 642         }
 643         return d;
 644     }
 645 
 646     /**
 647      * Return <code>f </code>&times;
 648      * 2<sup><code>scale_factor</code></sup> rounded as if performed
 649      * by a single correctly rounded floating-point multiply to a
 650      * member of the float value set.  See <a
 651      * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">&sect;4.2.3</a>
 652      * of the <a href="http://java.sun.com/docs/books/jls/html/">Java
 653      * Language Specification</a> for a discussion of floating-point
 654      * value set. If the exponent of the result is between the
 655      * <code>float</code>'s minimum exponent and maximum exponent, the
 656      * answer is calculated exactly.  If the exponent of the result
 657      * would be larger than <code>float</code>'s maximum exponent, an
 658      * infinity is returned.  Note that if the result is subnormal,
 659      * precision may be lost; that is, when <code>scalb(x, n)</code>
 660      * is subnormal, <code>scalb(scalb(x, n), -n)</code> may not equal
 661      * <i>x</i>.  When the result is non-NaN, the result has the same
 662      * sign as <code>f</code>.
 663      *
 664      *<p>
 665      * Special cases:
 666      * <ul>
 667      * <li> If the first argument is NaN, NaN is returned.
 668      * <li> If the first argument is infinite, then an infinity of the
 669      * same sign is returned.
 670      * <li> If the first argument is zero, then a zero of the same
 671      * sign is returned.
 672      * </ul>
 673      *
 674      * @param f number to be scaled by a power of two.
 675      * @param scale_factor power of 2 used to scale <code>f</code>
 676      * @return <code>f * </code>2<sup><code>scale_factor</code></sup>
 677      * @author Joseph D. Darcy
 678      */
 679      public static float scalb(float f, int scale_factor) {
 680         // magnitude of a power of two so large that scaling a finite
 681         // nonzero value by it would be guaranteed to over or
 682         // underflow; due to rounding, scaling down takes takes an
 683         // additional power of two which is reflected here
 684         final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT +
 685                               FloatConsts.SIGNIFICAND_WIDTH + 1;
 686 
 687         // Make sure scaling factor is in a reasonable range
 688         scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE);
 689 
 690         /*
 691          * Since + MAX_SCALE for float fits well within the double
 692          * exponent range and + float -> double conversion is exact
 693          * the multiplication below will be exact. Therefore, the
 694          * rounding that occurs when the double product is cast to
 695          * float will be the correctly rounded float result.  Since
 696          * all operations other than the final multiply will be exact,
 697          * it is not necessary to declare this method strictfp.
 698          */
 699         return (float)((double)f*powerOfTwoD(scale_factor));
 700     }
 701 
 702     /**
 703      * Returns the floating-point number adjacent to the first
 704      * argument in the direction of the second argument.  If both
 705      * arguments compare as equal the second argument is returned.
 706      *
 707      * <p>
 708      * Special cases:
 709      * <ul>
 710      * <li> If either argument is a NaN, then NaN is returned.
 711      *
 712      * <li> If both arguments are signed zeros, <code>direction</code>
 713      * is returned unchanged (as implied by the requirement of
 714      * returning the second argument if the arguments compare as
 715      * equal).
 716      *
 717      * <li> If <code>start</code> is
 718      * &plusmn;<code>Double.MIN_VALUE</code> and <code>direction</code>
 719      * has a value such that the result should have a smaller
 720      * magnitude, then a zero with the same sign as <code>start</code>
 721      * is returned.
 722      *
 723      * <li> If <code>start</code> is infinite and
 724      * <code>direction</code> has a value such that the result should
 725      * have a smaller magnitude, <code>Double.MAX_VALUE</code> with the
 726      * same sign as <code>start</code> is returned.
 727      *
 728      * <li> If <code>start</code> is equal to &plusmn;
 729      * <code>Double.MAX_VALUE</code> and <code>direction</code> has a
 730      * value such that the result should have a larger magnitude, an
 731      * infinity with same sign as <code>start</code> is returned.
 732      * </ul>
 733      *
 734      * @param start     starting floating-point value
 735      * @param direction value indicating which of
 736      * <code>start</code>'s neighbors or <code>start</code> should
 737      * be returned
 738      * @return The floating-point number adjacent to <code>start</code> in the
 739      * direction of <code>direction</code>.
 740      * @author Joseph D. Darcy
 741      */
 742     public static double nextAfter(double start, double direction) {
 743         /*
 744          * The cases:
 745          *
 746          * nextAfter(+infinity, 0)  == MAX_VALUE
 747          * nextAfter(+infinity, +infinity)  == +infinity
 748          * nextAfter(-infinity, 0)  == -MAX_VALUE
 749          * nextAfter(-infinity, -infinity)  == -infinity
 750          *
 751          * are naturally handled without any additional testing
 752          */
 753 
 754         // First check for NaN values
 755         if (isNaN(start) || isNaN(direction)) {
 756             // return a NaN derived from the input NaN(s)
 757             return start + direction;
 758         } else if (start == direction) {
 759             return direction;


 792                      * explicitly.
 793                      */
 794                     else
 795                         transducer = DoubleConsts.SIGN_BIT_MASK | 1L;
 796             }
 797 
 798             return Double.longBitsToDouble(transducer);
 799         }
 800     }
 801 
 802     /**
 803      * Returns the floating-point number adjacent to the first
 804      * argument in the direction of the second argument.  If both
 805      * arguments compare as equal, the second argument is returned.
 806      *
 807      * <p>
 808      * Special cases:
 809      * <ul>
 810      * <li> If either argument is a NaN, then NaN is returned.
 811      *
 812      * <li> If both arguments are signed zeros, a <code>float</code>
 813      * zero with the same sign as <code>direction</code> is returned
 814      * (as implied by the requirement of returning the second argument
 815      * if the arguments compare as equal).
 816      *
 817      * <li> If <code>start</code> is
 818      * &plusmn;<code>Float.MIN_VALUE</code> and <code>direction</code>
 819      * has a value such that the result should have a smaller
 820      * magnitude, then a zero with the same sign as <code>start</code>
 821      * is returned.
 822      *
 823      * <li> If <code>start</code> is infinite and
 824      * <code>direction</code> has a value such that the result should
 825      * have a smaller magnitude, <code>Float.MAX_VALUE</code> with the
 826      * same sign as <code>start</code> is returned.
 827      *
 828      * <li> If <code>start</code> is equal to &plusmn;
 829      * <code>Float.MAX_VALUE</code> and <code>direction</code> has a
 830      * value such that the result should have a larger magnitude, an
 831      * infinity with same sign as <code>start</code> is returned.
 832      * </ul>
 833      *
 834      * @param start     starting floating-point value
 835      * @param direction value indicating which of
 836      * <code>start</code>'s neighbors or <code>start</code> should
 837      * be returned
 838      * @return The floating-point number adjacent to <code>start</code> in the
 839      * direction of <code>direction</code>.
 840      * @author Joseph D. Darcy
 841      */
 842      public static float nextAfter(float start, double direction) {
 843         /*
 844          * The cases:
 845          *
 846          * nextAfter(+infinity, 0)  == MAX_VALUE
 847          * nextAfter(+infinity, +infinity)  == +infinity
 848          * nextAfter(-infinity, 0)  == -MAX_VALUE
 849          * nextAfter(-infinity, -infinity)  == -infinity
 850          *
 851          * are naturally handled without any additional testing
 852          */
 853 
 854         // First check for NaN values
 855         if (isNaN(start) || isNaN(direction)) {
 856             // return a NaN derived from the input NaN(s)
 857             return start + (float)direction;
 858         } else if (start == direction) {
 859             return (float)direction;


 883                 else
 884                     if (transducer < 0 )
 885                         ++transducer;
 886                     /*
 887                      * transducer==0, the result is -MIN_VALUE
 888                      *
 889                      * The transition from zero (implicitly
 890                      * positive) to the smallest negative
 891                      * signed magnitude value must be done
 892                      * explicitly.
 893                      */
 894                     else
 895                         transducer = FloatConsts.SIGN_BIT_MASK | 1;
 896             }
 897 
 898             return Float.intBitsToFloat(transducer);
 899         }
 900     }
 901 
 902     /**
 903      * Returns the floating-point value adjacent to <code>d</code> in
 904      * the direction of positive infinity.  This method is
 905      * semantically equivalent to <code>nextAfter(d,
 906      * Double.POSITIVE_INFINITY)</code>; however, a <code>nextUp</code>
 907      * implementation may run faster than its equivalent
 908      * <code>nextAfter</code> call.
 909      *
 910      * <p>Special Cases:
 911      * <ul>
 912      * <li> If the argument is NaN, the result is NaN.
 913      *
 914      * <li> If the argument is positive infinity, the result is
 915      * positive infinity.
 916      *
 917      * <li> If the argument is zero, the result is
 918      * <code>Double.MIN_VALUE</code>
 919      *
 920      * </ul>
 921      *
 922      * @param d  starting floating-point value
 923      * @return The adjacent floating-point value closer to positive
 924      * infinity.
 925      * @author Joseph D. Darcy
 926      */
 927     public static double nextUp(double d) {
 928         if( isNaN(d) || d == Double.POSITIVE_INFINITY)
 929             return d;
 930         else {
 931             d += 0.0d;
 932             return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
 933                                            ((d >= 0.0d)?+1L:-1L));
 934         }
 935     }
 936 
 937     /**
 938      * Returns the floating-point value adjacent to <code>f</code> in
 939      * the direction of positive infinity.  This method is
 940      * semantically equivalent to <code>nextAfter(f,
 941      * Double.POSITIVE_INFINITY)</code>; however, a <code>nextUp</code>
 942      * implementation may run faster than its equivalent
 943      * <code>nextAfter</code> call.
 944      *
 945      * <p>Special Cases:
 946      * <ul>
 947      * <li> If the argument is NaN, the result is NaN.
 948      *
 949      * <li> If the argument is positive infinity, the result is
 950      * positive infinity.
 951      *
 952      * <li> If the argument is zero, the result is
 953      * <code>Float.MIN_VALUE</code>
 954      *
 955      * </ul>
 956      *
 957      * @param f  starting floating-point value
 958      * @return The adjacent floating-point value closer to positive
 959      * infinity.
 960      * @author Joseph D. Darcy
 961      */
 962      public static float nextUp(float f) {
 963         if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY)
 964             return f;
 965         else {
 966             f += 0.0f;
 967             return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
 968                                         ((f >= 0.0f)?+1:-1));
 969         }
 970     }
 971 
 972     /**
 973      * Returns the floating-point value adjacent to <code>d</code> in
 974      * the direction of negative infinity.  This method is
 975      * semantically equivalent to <code>nextAfter(d,
 976      * Double.NEGATIVE_INFINITY)</code>; however, a
 977      * <code>nextDown</code> implementation may run faster than its
 978      * equivalent <code>nextAfter</code> call.
 979      *
 980      * <p>Special Cases:
 981      * <ul>
 982      * <li> If the argument is NaN, the result is NaN.
 983      *
 984      * <li> If the argument is negative infinity, the result is
 985      * negative infinity.
 986      *
 987      * <li> If the argument is zero, the result is
 988      * <code>-Double.MIN_VALUE</code>
 989      *
 990      * </ul>
 991      *
 992      * @param d  starting floating-point value
 993      * @return The adjacent floating-point value closer to negative
 994      * infinity.
 995      * @author Joseph D. Darcy
 996      */
 997     public static double nextDown(double d) {
 998         if( isNaN(d) || d == Double.NEGATIVE_INFINITY)
 999             return d;
1000         else {
1001             if (d == 0.0)
1002                 return -Double.MIN_VALUE;
1003             else
1004                 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
1005                                                ((d > 0.0d)?-1L:+1L));
1006         }
1007     }
1008 
1009     /**
1010      * Returns the floating-point value adjacent to <code>f</code> in
1011      * the direction of negative infinity.  This method is
1012      * semantically equivalent to <code>nextAfter(f,
1013      * Float.NEGATIVE_INFINITY)</code>; however, a
1014      * <code>nextDown</code> implementation may run faster than its
1015      * equivalent <code>nextAfter</code> call.
1016      *
1017      * <p>Special Cases:
1018      * <ul>
1019      * <li> If the argument is NaN, the result is NaN.
1020      *
1021      * <li> If the argument is negative infinity, the result is
1022      * negative infinity.
1023      *
1024      * <li> If the argument is zero, the result is
1025      * <code>-Float.MIN_VALUE</code>
1026      *
1027      * </ul>
1028      *
1029      * @param f  starting floating-point value
1030      * @return The adjacent floating-point value closer to negative
1031      * infinity.
1032      * @author Joseph D. Darcy
1033      */
1034     public static double nextDown(float f) {
1035         if( isNaN(f) || f == Float.NEGATIVE_INFINITY)
1036             return f;
1037         else {
1038             if (f == 0.0f)
1039                 return -Float.MIN_VALUE;
1040             else
1041                 return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
1042                                             ((f > 0.0f)?-1:+1));
1043         }
1044     }
1045 
1046     /**
1047      * Returns the first floating-point argument with the sign of the
1048      * second floating-point argument.  For this method, a NaN
1049      * <code>sign</code> argument is always treated as if it were
1050      * positive.
1051      *
1052      * @param magnitude  the parameter providing the magnitude of the result
1053      * @param sign   the parameter providing the sign of the result
1054      * @return a value with the magnitude of <code>magnitude</code>
1055      * and the sign of <code>sign</code>.
1056      * @author Joseph D. Darcy
1057      * @since 1.5
1058      */
1059     public static double copySign(double magnitude, double sign) {
1060         return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign));
1061     }
1062 
1063     /**
1064      * Returns the first floating-point argument with the sign of the
1065      * second floating-point argument.  For this method, a NaN
1066      * <code>sign</code> argument is always treated as if it were
1067      * positive.
1068      *
1069      * @param magnitude  the parameter providing the magnitude of the result
1070      * @param sign   the parameter providing the sign of the result
1071      * @return a value with the magnitude of <code>magnitude</code>
1072      * and the sign of <code>sign</code>.
1073      * @author Joseph D. Darcy
1074      */
1075      public static float copySign(float magnitude, float sign) {
1076         return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign));
1077     }
1078 
1079     /**
1080      * Returns the size of an ulp of the argument.  An ulp of a
1081      * <code>double</code> value is the positive distance between this
1082      * floating-point value and the <code>double</code> value next
1083      * larger in magnitude.  Note that for non-NaN <i>x</i>,
1084      * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1085      *
1086      * <p>Special Cases:
1087      * <ul>
1088      * <li> If the argument is NaN, then the result is NaN.
1089      * <li> If the argument is positive or negative infinity, then the
1090      * result is positive infinity.
1091      * <li> If the argument is positive or negative zero, then the result is
1092      * <code>Double.MIN_VALUE</code>.
1093      * <li> If the argument is &plusmn;<code>Double.MAX_VALUE</code>, then
1094      * the result is equal to 2<sup>971</sup>.
1095      * </ul>
1096      *
1097      * @param d the floating-point value whose ulp is to be returned
1098      * @return the size of an ulp of the argument
1099      * @author Joseph D. Darcy
1100      * @since 1.5
1101      */
1102     public static double ulp(double d) {
1103         int exp = getExponent(d);
1104 
1105         switch(exp) {
1106         case DoubleConsts.MAX_EXPONENT+1:       // NaN or infinity
1107             return Math.abs(d);
1108             // break;
1109 
1110         case DoubleConsts.MIN_EXPONENT-1:       // zero or subnormal
1111             return Double.MIN_VALUE;
1112             // break
1113 
1114         default:
1115             assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT;
1116 
1117             // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1118             exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1);
1119             if (exp >= DoubleConsts.MIN_EXPONENT) {
1120                 return powerOfTwoD(exp);
1121             }
1122             else {
1123                 // return a subnormal result; left shift integer
1124                 // representation of Double.MIN_VALUE appropriate
1125                 // number of positions
1126                 return Double.longBitsToDouble(1L <<
1127                 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) ));
1128             }
1129             // break
1130         }
1131     }
1132 
1133     /**
1134      * Returns the size of an ulp of the argument.  An ulp of a
1135      * <code>float</code> value is the positive distance between this
1136      * floating-point value and the <code>float</code> value next
1137      * larger in magnitude.  Note that for non-NaN <i>x</i>,
1138      * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1139      *
1140      * <p>Special Cases:
1141      * <ul>
1142      * <li> If the argument is NaN, then the result is NaN.
1143      * <li> If the argument is positive or negative infinity, then the
1144      * result is positive infinity.
1145      * <li> If the argument is positive or negative zero, then the result is
1146      * <code>Float.MIN_VALUE</code>.
1147      * <li> If the argument is &plusmn;<code>Float.MAX_VALUE</code>, then
1148      * the result is equal to 2<sup>104</sup>.
1149      * </ul>
1150      *
1151      * @param f the floating-point value whose ulp is to be returned
1152      * @return the size of an ulp of the argument
1153      * @author Joseph D. Darcy
1154      * @since 1.5
1155      */
1156      public static float ulp(float f) {
1157         int exp = getExponent(f);
1158 
1159         switch(exp) {
1160         case FloatConsts.MAX_EXPONENT+1:        // NaN or infinity
1161             return Math.abs(f);
1162             // break;
1163 
1164         case FloatConsts.MIN_EXPONENT-1:        // zero or subnormal
1165             return FloatConsts.MIN_VALUE;
1166             // break
1167 
1168         default:
1169             assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT;
1170 
1171             // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1172             exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1);
1173             if (exp >= FloatConsts.MIN_EXPONENT) {
1174                 return powerOfTwoF(exp);
1175             }
1176             else {
1177                 // return a subnormal result; left shift integer
1178                 // representation of FloatConsts.MIN_VALUE appropriate
1179                 // number of positions
1180                 return Float.intBitsToFloat(1 <<
1181                 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) ));
1182             }
1183             // break
1184         }
1185      }
1186 
1187     /**
1188      * Returns the signum function of the argument; zero if the argument
1189      * is zero, 1.0 if the argument is greater than zero, -1.0 if the
1190      * argument is less than zero.
1191      *
1192      * <p>Special Cases:
1193      * <ul>
1194      * <li> If the argument is NaN, then the result is NaN.
1195      * <li> If the argument is positive zero or negative zero, then the
1196      *      result is the same as the argument.
1197      * </ul>
1198      *
1199      * @param d the floating-point value whose signum is to be returned
1200      * @return the signum function of the argument
1201      * @author Joseph D. Darcy
1202      * @since 1.5
1203      */


   1 /*
   2  * Copyright (c) 2003, 2010 Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.misc;
  27 
  28 import sun.misc.FloatConsts;
  29 import sun.misc.DoubleConsts;
  30 
  31 /**
  32  * The class {@code FpUtils} contains static utility methods for
  33  * manipulating and inspecting {@code float} and
  34  * {@code double} floating-point numbers.  These methods include
  35  * functionality recommended or required by the IEEE 754
  36  * floating-point standard.
  37  *
  38  * @author Joseph D. Darcy
  39  */
  40 
  41 public class FpUtils {
  42     /*
  43      * The methods in this class are reasonably implemented using
  44      * direct or indirect bit-level manipulation of floating-point
  45      * values.  However, having access to the IEEE 754 recommended
  46      * functions would obviate the need for most programmers to engage
  47      * in floating-point bit-twiddling.
  48      *
  49      * An IEEE 754 number has three fields, from most significant bit
  50      * to to least significant, sign, exponent, and significand.
  51      *
  52      *  msb                                lsb
  53      * [sign|exponent|  fractional_significand]
  54      *


 119      * ``100% Pure Java''(TM)," University of California, Berkeley
 120      * technical report UCB//CSD-98-1009.
 121      */
 122 
 123     /**
 124      * Don't let anyone instantiate this class.
 125      */
 126     private FpUtils() {}
 127 
 128     // Constants used in scalb
 129     static double twoToTheDoubleScaleUp = powerOfTwoD(512);
 130     static double twoToTheDoubleScaleDown = powerOfTwoD(-512);
 131 
 132     // Helper Methods
 133 
 134     // The following helper methods are used in the implementation of
 135     // the public recommended functions; they generally omit certain
 136     // tests for exception cases.
 137 
 138     /**
 139      * Returns unbiased exponent of a {@code double}.
 140      */
 141     public static int getExponent(double d){
 142         /*
 143          * Bitwise convert d to long, mask out exponent bits, shift
 144          * to the right and then subtract out double's bias adjust to
 145          * get true exponent value.
 146          */
 147         return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >>
 148                       (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS);
 149     }
 150 
 151     /**
 152      * Returns unbiased exponent of a {@code float}.
 153      */
 154     public static int getExponent(float f){
 155         /*
 156          * Bitwise convert f to integer, mask out exponent bits, shift
 157          * to the right and then subtract out float's bias adjust to
 158          * get true exponent value
 159          */
 160         return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >>
 161                 (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS;
 162     }
 163 
 164     /**
 165      * Returns a floating-point power of two in the normal range.
 166      */
 167     static double powerOfTwoD(int n) {
 168         assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT);
 169         return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) <<
 170                                         (DoubleConsts.SIGNIFICAND_WIDTH-1))
 171                                        & DoubleConsts.EXP_BIT_MASK);
 172     }
 173 
 174     /**
 175      * Returns a floating-point power of two in the normal range.
 176      */
 177     static float powerOfTwoF(int n) {
 178         assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT);
 179         return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) <<
 180                                      (FloatConsts.SIGNIFICAND_WIDTH-1))
 181                                     & FloatConsts.EXP_BIT_MASK);
 182     }
 183 
 184     /**
 185      * Returns the first floating-point argument with the sign of the
 186      * second floating-point argument.  Note that unlike the {@link
 187      * FpUtils#copySign(double, double) copySign} method, this method
 188      * does not require NaN {@code sign} arguments to be treated
 189      * as positive values; implementations are permitted to treat some
 190      * NaN arguments as positive and other NaN arguments as negative
 191      * to allow greater performance.
 192      *
 193      * @param magnitude  the parameter providing the magnitude of the result
 194      * @param sign   the parameter providing the sign of the result
 195      * @return a value with the magnitude of {@code magnitude}
 196      * and the sign of {@code sign}.
 197      * @author Joseph D. Darcy
 198      */
 199     public static double rawCopySign(double magnitude, double sign) {
 200         return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) &
 201                                         (DoubleConsts.SIGN_BIT_MASK)) |
 202                                        (Double.doubleToRawLongBits(magnitude) &
 203                                         (DoubleConsts.EXP_BIT_MASK |
 204                                          DoubleConsts.SIGNIF_BIT_MASK)));
 205     }
 206 
 207     /**
 208      * Returns the first floating-point argument with the sign of the
 209      * second floating-point argument.  Note that unlike the {@link
 210      * FpUtils#copySign(float, float) copySign} method, this method
 211      * does not require NaN {@code sign} arguments to be treated
 212      * as positive values; implementations are permitted to treat some
 213      * NaN arguments as positive and other NaN arguments as negative
 214      * to allow greater performance.
 215      *
 216      * @param magnitude  the parameter providing the magnitude of the result
 217      * @param sign   the parameter providing the sign of the result
 218      * @return a value with the magnitude of {@code magnitude}
 219      * and the sign of {@code sign}.
 220      * @author Joseph D. Darcy
 221      */
 222     public static float rawCopySign(float magnitude, float sign) {
 223         return Float.intBitsToFloat((Float.floatToRawIntBits(sign) &
 224                                      (FloatConsts.SIGN_BIT_MASK)) |
 225                                     (Float.floatToRawIntBits(magnitude) &
 226                                      (FloatConsts.EXP_BIT_MASK |
 227                                       FloatConsts.SIGNIF_BIT_MASK)));
 228     }
 229 
 230     /* ***************************************************************** */
 231 
 232     /**
 233      * Returns {@code true} if the argument is a finite
 234      * floating-point value; returns {@code false} otherwise (for
 235      * NaN and infinity arguments).
 236      *
 237      * @param d the {@code double} value to be tested
 238      * @return {@code true} if the argument is a finite
 239      * floating-point value, {@code false} otherwise.
 240      */
 241     public static boolean isFinite(double d) {
 242         return Math.abs(d) <= DoubleConsts.MAX_VALUE;
 243     }
 244 
 245     /**
 246      * Returns {@code true} if the argument is a finite
 247      * floating-point value; returns {@code false} otherwise (for
 248      * NaN and infinity arguments).
 249      *
 250      * @param f the {@code float} value to be tested
 251      * @return {@code true} if the argument is a finite
 252      * floating-point value, {@code false} otherwise.
 253      */
 254      public static boolean isFinite(float f) {
 255         return Math.abs(f) <= FloatConsts.MAX_VALUE;
 256     }
 257 
 258     /**
 259      * Returns {@code true} if the specified number is infinitely
 260      * large in magnitude, {@code false} otherwise.
 261      *
 262      * <p>Note that this method is equivalent to the {@link
 263      * Double#isInfinite(double) Double.isInfinite} method; the
 264      * functionality is included in this class for convenience.
 265      *
 266      * @param   d   the value to be tested.
 267      * @return  {@code true} if the value of the argument is positive
 268      *          infinity or negative infinity; {@code false} otherwise.
 269      */
 270     public static boolean isInfinite(double d) {
 271         return Double.isInfinite(d);
 272     }
 273 
 274     /**
 275      * Returns {@code true} if the specified number is infinitely
 276      * large in magnitude, {@code false} otherwise.
 277      *
 278      * <p>Note that this method is equivalent to the {@link
 279      * Float#isInfinite(float) Float.isInfinite} method; the
 280      * functionality is included in this class for convenience.
 281      *
 282      * @param   f   the value to be tested.
 283      * @return  {@code true} if the argument is positive infinity or
 284      *          negative infinity; {@code false} otherwise.
 285      */
 286      public static boolean isInfinite(float f) {
 287          return Float.isInfinite(f);
 288     }
 289 
 290     /**
 291      * Returns {@code true} if the specified number is a
 292      * Not-a-Number (NaN) value, {@code false} otherwise.
 293      *
 294      * <p>Note that this method is equivalent to the {@link
 295      * Double#isNaN(double) Double.isNaN} method; the functionality is
 296      * included in this class for convenience.
 297      *
 298      * @param   d   the value to be tested.
 299      * @return  {@code true} if the value of the argument is NaN;
 300      *          {@code false} otherwise.
 301      */
 302     public static boolean isNaN(double d) {
 303         return Double.isNaN(d);
 304     }
 305 
 306     /**
 307      * Returns {@code true} if the specified number is a
 308      * Not-a-Number (NaN) value, {@code false} otherwise.
 309      *
 310      * <p>Note that this method is equivalent to the {@link
 311      * Float#isNaN(float) Float.isNaN} method; the functionality is
 312      * included in this class for convenience.
 313      *
 314      * @param   f   the value to be tested.
 315      * @return  {@code true} if the argument is NaN;
 316      *          {@code false} otherwise.
 317      */
 318      public static boolean isNaN(float f) {
 319         return Float.isNaN(f);
 320     }
 321 
 322     /**
 323      * Returns {@code true} if the unordered relation holds
 324      * between the two arguments.  When two floating-point values are
 325      * unordered, one value is neither less than, equal to, nor
 326      * greater than the other.  For the unordered relation to be true,
 327      * at least one argument must be a {@code NaN}.
 328      *
 329      * @param arg1      the first argument
 330      * @param arg2      the second argument
 331      * @return {@code true} if at least one argument is a NaN,
 332      * {@code false} otherwise.
 333      */
 334     public static boolean isUnordered(double arg1, double arg2) {
 335         return isNaN(arg1) || isNaN(arg2);
 336     }
 337 
 338     /**
 339      * Returns {@code true} if the unordered relation holds
 340      * between the two arguments.  When two floating-point values are
 341      * unordered, one value is neither less than, equal to, nor
 342      * greater than the other.  For the unordered relation to be true,
 343      * at least one argument must be a {@code NaN}.
 344      *
 345      * @param arg1      the first argument
 346      * @param arg2      the second argument
 347      * @return {@code true} if at least one argument is a NaN,
 348      * {@code false} otherwise.
 349      */
 350      public static boolean isUnordered(float arg1, float arg2) {
 351         return isNaN(arg1) || isNaN(arg2);
 352     }
 353 
 354     /**
 355      * Returns unbiased exponent of a {@code double}; for
 356      * subnormal values, the number is treated as if it were
 357      * normalized.  That is for all finite, non-zero, positive numbers
 358      * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is
 359      * always in the range [1, 2).
 360      * <p>
 361      * Special cases:
 362      * <ul>
 363      * <li> If the argument is NaN, then the result is 2<sup>30</sup>.
 364      * <li> If the argument is infinite, then the result is 2<sup>28</sup>.
 365      * <li> If the argument is zero, then the result is -(2<sup>28</sup>).
 366      * </ul>
 367      *
 368      * @param d floating-point number whose exponent is to be extracted
 369      * @return unbiased exponent of the argument.
 370      * @author Joseph D. Darcy
 371      */
 372     public static int ilogb(double d) {
 373         int exponent = getExponent(d);
 374 
 375         switch (exponent) {
 376         case DoubleConsts.MAX_EXPONENT+1:       // NaN or infinity
 377             if( isNaN(d) )
 378                 return (1<<30);         // 2^30
 379             else // infinite value
 380                 return (1<<28);         // 2^28

 381 
 382         case DoubleConsts.MIN_EXPONENT-1:       // zero or subnormal
 383             if(d == 0.0) {
 384                 return -(1<<28);        // -(2^28)
 385             }
 386             else {
 387                 long transducer = Double.doubleToRawLongBits(d);
 388 
 389                 /*
 390                  * To avoid causing slow arithmetic on subnormals,
 391                  * the scaling to determine when d's significand
 392                  * is normalized is done in integer arithmetic.
 393                  * (there must be at least one "1" bit in the
 394                  * significand since zero has been screened out.
 395                  */
 396 
 397                 // isolate significand bits
 398                 transducer &= DoubleConsts.SIGNIF_BIT_MASK;
 399                 assert(transducer != 0L);
 400 
 401                 // This loop is simple and functional. We might be
 402                 // able to do something more clever that was faster;
 403                 // e.g. number of leading zero detection on
 404                 // (transducer << (# exponent and sign bits).
 405                 while (transducer <
 406                        (1L << (DoubleConsts.SIGNIFICAND_WIDTH - 1))) {
 407                     transducer *= 2;
 408                     exponent--;
 409                 }
 410                 exponent++;
 411                 assert( exponent >=
 412                         DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1) &&
 413                         exponent < DoubleConsts.MIN_EXPONENT);
 414                 return exponent;
 415             }

 416 
 417         default:
 418             assert( exponent >= DoubleConsts.MIN_EXPONENT &&
 419                     exponent <= DoubleConsts.MAX_EXPONENT);
 420             return exponent;

 421         }
 422     }
 423 
 424     /**
 425      * Returns unbiased exponent of a {@code float}; for
 426      * subnormal values, the number is treated as if it were
 427      * normalized.  That is for all finite, non-zero, positive numbers
 428      * <i>x</i>, <code>scalb(<i>x</i>, -ilogb(<i>x</i>))</code> is
 429      * always in the range [1, 2).
 430      * <p>
 431      * Special cases:
 432      * <ul>
 433      * <li> If the argument is NaN, then the result is 2<sup>30</sup>.
 434      * <li> If the argument is infinite, then the result is 2<sup>28</sup>.
 435      * <li> If the argument is zero, then the result is -(2<sup>28</sup>).
 436      * </ul>
 437      *
 438      * @param f floating-point number whose exponent is to be extracted
 439      * @return unbiased exponent of the argument.
 440      * @author Joseph D. Darcy
 441      */
 442      public static int ilogb(float f) {
 443         int exponent = getExponent(f);
 444 
 445         switch (exponent) {
 446         case FloatConsts.MAX_EXPONENT+1:        // NaN or infinity
 447             if( isNaN(f) )
 448                 return (1<<30);         // 2^30
 449             else // infinite value
 450                 return (1<<28);         // 2^28

 451 
 452         case FloatConsts.MIN_EXPONENT-1:        // zero or subnormal
 453             if(f == 0.0f) {
 454                 return -(1<<28);        // -(2^28)
 455             }
 456             else {
 457                 int transducer = Float.floatToRawIntBits(f);
 458 
 459                 /*
 460                  * To avoid causing slow arithmetic on subnormals,
 461                  * the scaling to determine when f's significand
 462                  * is normalized is done in integer arithmetic.
 463                  * (there must be at least one "1" bit in the
 464                  * significand since zero has been screened out.
 465                  */
 466 
 467                 // isolate significand bits
 468                 transducer &= FloatConsts.SIGNIF_BIT_MASK;
 469                 assert(transducer != 0);
 470 
 471                 // This loop is simple and functional. We might be
 472                 // able to do something more clever that was faster;
 473                 // e.g. number of leading zero detection on
 474                 // (transducer << (# exponent and sign bits).
 475                 while (transducer <
 476                        (1 << (FloatConsts.SIGNIFICAND_WIDTH - 1))) {
 477                     transducer *= 2;
 478                     exponent--;
 479                 }
 480                 exponent++;
 481                 assert( exponent >=
 482                         FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1) &&
 483                         exponent < FloatConsts.MIN_EXPONENT);
 484                 return exponent;
 485             }

 486 
 487         default:
 488             assert( exponent >= FloatConsts.MIN_EXPONENT &&
 489                     exponent <= FloatConsts.MAX_EXPONENT);
 490             return exponent;

 491         }
 492     }
 493 
 494 
 495     /*
 496      * The scalb operation should be reasonably fast; however, there
 497      * are tradeoffs in writing a method to minimize the worst case
 498      * performance and writing a method to minimize the time for
 499      * expected common inputs.  Some processors operate very slowly on
 500      * subnormal operands, taking hundreds or thousands of cycles for
 501      * one floating-point add or multiply as opposed to, say, four
 502      * cycles for normal operands.  For processors with very slow
 503      * subnormal execution, scalb would be fastest if written entirely
 504      * with integer operations; in other words, scalb would need to
 505      * include the logic of performing correct rounding of subnormal
 506      * values.  This could be reasonably done in at most a few hundred
 507      * cycles.  However, this approach may penalize normal operations
 508      * since at least the exponent of the floating-point argument must
 509      * be examined.
 510      *
 511      * The approach taken in this implementation is a compromise.
 512      * Floating-point multiplication is used to do most of the work;
 513      * but knowingly multiplying by a subnormal scaling factor is
 514      * avoided.  However, the floating-point argument is not examined
 515      * to see whether or not it is subnormal since subnormal inputs
 516      * are assumed to be rare.  At most three multiplies are needed to
 517      * scale from the largest to smallest exponent ranges (scaling
 518      * down, at most two multiplies are needed if subnormal scaling
 519      * factors are allowed).  However, in this implementation an
 520      * expensive integer remainder operation is avoided at the cost of
 521      * requiring five floating-point multiplies in the worst case,
 522      * which should still be a performance win.
 523      *
 524      * If scaling of entire arrays is a concern, it would probably be
 525      * more efficient to provide a double[] scalb(double[], int)
 526      * version of scalb to avoid having to recompute the needed
 527      * scaling factors for each floating-point value.
 528      */
 529 
 530     /**
 531      * Return {@code d} &times;
 532      * 2<sup>{@code scale_factor}</sup> rounded as if performed
 533      * by a single correctly rounded floating-point multiply to a
 534      * member of the double value set.  See <a
 535      * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">&sect;4.2.3</a>
 536      * of the <a href="http://java.sun.com/docs/books/jls/html/">Java
 537      * Language Specification</a> for a discussion of floating-point
 538      * value sets.  If the exponent of the result is between the
 539      * {@code double}'s minimum exponent and maximum exponent,
 540      * the answer is calculated exactly.  If the exponent of the
 541      * result would be larger than {@code doubles}'s maximum
 542      * exponent, an infinity is returned.  Note that if the result is
 543      * subnormal, precision may be lost; that is, when {@code scalb(x,
 544      * n)} is subnormal, {@code scalb(scalb(x, n), -n)} may
 545      * not equal <i>x</i>.  When the result is non-NaN, the result has
 546      * the same sign as {@code d}.
 547      *
 548      *<p>
 549      * Special cases:
 550      * <ul>
 551      * <li> If the first argument is NaN, NaN is returned.
 552      * <li> If the first argument is infinite, then an infinity of the
 553      * same sign is returned.
 554      * <li> If the first argument is zero, then a zero of the same
 555      * sign is returned.
 556      * </ul>
 557      *
 558      * @param d number to be scaled by a power of two.
 559      * @param scale_factor power of 2 used to scale {@code d}
 560      * @return {@code d * }2<sup>{@code scale_factor}</sup>
 561      * @author Joseph D. Darcy
 562      */
 563     public static double scalb(double d, int scale_factor) {
 564         /*
 565          * This method does not need to be declared strictfp to
 566          * compute the same correct result on all platforms.  When
 567          * scaling up, it does not matter what order the
 568          * multiply-store operations are done; the result will be
 569          * finite or overflow regardless of the operation ordering.
 570          * However, to get the correct result when scaling down, a
 571          * particular ordering must be used.
 572          *
 573          * When scaling down, the multiply-store operations are
 574          * sequenced so that it is not possible for two consecutive
 575          * multiply-stores to return subnormal results.  If one
 576          * multiply-store result is subnormal, the next multiply will
 577          * round it away to zero.  This is done by first multiplying
 578          * by 2 ^ (scale_factor % n) and then multiplying several
 579          * times by by 2^n as needed where n is the exponent of number
 580          * that is a covenient power of two.  In this way, at most one


 621             scale_increment = 512;
 622             exp_delta = twoToTheDoubleScaleUp;
 623         }
 624 
 625         // Calculate (scale_factor % +/-512), 512 = 2^9, using
 626         // technique from "Hacker's Delight" section 10-2.
 627         int t = (scale_factor >> 9-1) >>> 32 - 9;
 628         exp_adjust = ((scale_factor + t) & (512 -1)) - t;
 629 
 630         d *= powerOfTwoD(exp_adjust);
 631         scale_factor -= exp_adjust;
 632 
 633         while(scale_factor != 0) {
 634             d *= exp_delta;
 635             scale_factor -= scale_increment;
 636         }
 637         return d;
 638     }
 639 
 640     /**
 641      * Return {@code f} &times;
 642      * 2<sup>{@code scale_factor}</sup> rounded as if performed
 643      * by a single correctly rounded floating-point multiply to a
 644      * member of the float value set.  See <a
 645      * href="http://java.sun.com/docs/books/jls/second_edition/html/typesValues.doc.html#9208">&sect;4.2.3</a>
 646      * of the <a href="http://java.sun.com/docs/books/jls/html/">Java
 647      * Language Specification</a> for a discussion of floating-point
 648      * value set. If the exponent of the result is between the
 649      * {@code float}'s minimum exponent and maximum exponent, the
 650      * answer is calculated exactly.  If the exponent of the result
 651      * would be larger than {@code float}'s maximum exponent, an
 652      * infinity is returned.  Note that if the result is subnormal,
 653      * precision may be lost; that is, when {@code scalb(x, n)}
 654      * is subnormal, {@code scalb(scalb(x, n), -n)} may not equal
 655      * <i>x</i>.  When the result is non-NaN, the result has the same
 656      * sign as {@code f}.
 657      *
 658      *<p>
 659      * Special cases:
 660      * <ul>
 661      * <li> If the first argument is NaN, NaN is returned.
 662      * <li> If the first argument is infinite, then an infinity of the
 663      * same sign is returned.
 664      * <li> If the first argument is zero, then a zero of the same
 665      * sign is returned.
 666      * </ul>
 667      *
 668      * @param f number to be scaled by a power of two.
 669      * @param scale_factor power of 2 used to scale {@code f}
 670      * @return {@code f * }2<sup>{@code scale_factor}</sup>
 671      * @author Joseph D. Darcy
 672      */
 673      public static float scalb(float f, int scale_factor) {
 674         // magnitude of a power of two so large that scaling a finite
 675         // nonzero value by it would be guaranteed to over or
 676         // underflow; due to rounding, scaling down takes takes an
 677         // additional power of two which is reflected here
 678         final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT +
 679                               FloatConsts.SIGNIFICAND_WIDTH + 1;
 680 
 681         // Make sure scaling factor is in a reasonable range
 682         scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE);
 683 
 684         /*
 685          * Since + MAX_SCALE for float fits well within the double
 686          * exponent range and + float -> double conversion is exact
 687          * the multiplication below will be exact. Therefore, the
 688          * rounding that occurs when the double product is cast to
 689          * float will be the correctly rounded float result.  Since
 690          * all operations other than the final multiply will be exact,
 691          * it is not necessary to declare this method strictfp.
 692          */
 693         return (float)((double)f*powerOfTwoD(scale_factor));
 694     }
 695 
 696     /**
 697      * Returns the floating-point number adjacent to the first
 698      * argument in the direction of the second argument.  If both
 699      * arguments compare as equal the second argument is returned.
 700      *
 701      * <p>
 702      * Special cases:
 703      * <ul>
 704      * <li> If either argument is a NaN, then NaN is returned.
 705      *
 706      * <li> If both arguments are signed zeros, {@code direction}
 707      * is returned unchanged (as implied by the requirement of
 708      * returning the second argument if the arguments compare as
 709      * equal).
 710      *
 711      * <li> If {@code start} is
 712      * &plusmn;{@code Double.MIN_VALUE} and {@code direction}
 713      * has a value such that the result should have a smaller
 714      * magnitude, then a zero with the same sign as {@code start}
 715      * is returned.
 716      *
 717      * <li> If {@code start} is infinite and
 718      * {@code direction} has a value such that the result should
 719      * have a smaller magnitude, {@code Double.MAX_VALUE} with the
 720      * same sign as {@code start} is returned.
 721      *
 722      * <li> If {@code start} is equal to &plusmn;
 723      * {@code Double.MAX_VALUE} and {@code direction} has a
 724      * value such that the result should have a larger magnitude, an
 725      * infinity with same sign as {@code start} is returned.
 726      * </ul>
 727      *
 728      * @param start     starting floating-point value
 729      * @param direction value indicating which of
 730      * {@code start}'s neighbors or {@code start} should
 731      * be returned
 732      * @return The floating-point number adjacent to {@code start} in the
 733      * direction of {@code direction}.
 734      * @author Joseph D. Darcy
 735      */
 736     public static double nextAfter(double start, double direction) {
 737         /*
 738          * The cases:
 739          *
 740          * nextAfter(+infinity, 0)  == MAX_VALUE
 741          * nextAfter(+infinity, +infinity)  == +infinity
 742          * nextAfter(-infinity, 0)  == -MAX_VALUE
 743          * nextAfter(-infinity, -infinity)  == -infinity
 744          *
 745          * are naturally handled without any additional testing
 746          */
 747 
 748         // First check for NaN values
 749         if (isNaN(start) || isNaN(direction)) {
 750             // return a NaN derived from the input NaN(s)
 751             return start + direction;
 752         } else if (start == direction) {
 753             return direction;


 786                      * explicitly.
 787                      */
 788                     else
 789                         transducer = DoubleConsts.SIGN_BIT_MASK | 1L;
 790             }
 791 
 792             return Double.longBitsToDouble(transducer);
 793         }
 794     }
 795 
 796     /**
 797      * Returns the floating-point number adjacent to the first
 798      * argument in the direction of the second argument.  If both
 799      * arguments compare as equal, the second argument is returned.
 800      *
 801      * <p>
 802      * Special cases:
 803      * <ul>
 804      * <li> If either argument is a NaN, then NaN is returned.
 805      *
 806      * <li> If both arguments are signed zeros, a {@code float}
 807      * zero with the same sign as {@code direction} is returned
 808      * (as implied by the requirement of returning the second argument
 809      * if the arguments compare as equal).
 810      *
 811      * <li> If {@code start} is
 812      * &plusmn;{@code Float.MIN_VALUE} and {@code direction}
 813      * has a value such that the result should have a smaller
 814      * magnitude, then a zero with the same sign as {@code start}
 815      * is returned.
 816      *
 817      * <li> If {@code start} is infinite and
 818      * {@code direction} has a value such that the result should
 819      * have a smaller magnitude, {@code Float.MAX_VALUE} with the
 820      * same sign as {@code start} is returned.
 821      *
 822      * <li> If {@code start} is equal to &plusmn;
 823      * {@code Float.MAX_VALUE} and {@code direction} has a
 824      * value such that the result should have a larger magnitude, an
 825      * infinity with same sign as {@code start} is returned.
 826      * </ul>
 827      *
 828      * @param start     starting floating-point value
 829      * @param direction value indicating which of
 830      * {@code start}'s neighbors or {@code start} should
 831      * be returned
 832      * @return The floating-point number adjacent to {@code start} in the
 833      * direction of {@code direction}.
 834      * @author Joseph D. Darcy
 835      */
 836      public static float nextAfter(float start, double direction) {
 837         /*
 838          * The cases:
 839          *
 840          * nextAfter(+infinity, 0)  == MAX_VALUE
 841          * nextAfter(+infinity, +infinity)  == +infinity
 842          * nextAfter(-infinity, 0)  == -MAX_VALUE
 843          * nextAfter(-infinity, -infinity)  == -infinity
 844          *
 845          * are naturally handled without any additional testing
 846          */
 847 
 848         // First check for NaN values
 849         if (isNaN(start) || isNaN(direction)) {
 850             // return a NaN derived from the input NaN(s)
 851             return start + (float)direction;
 852         } else if (start == direction) {
 853             return (float)direction;


 877                 else
 878                     if (transducer < 0 )
 879                         ++transducer;
 880                     /*
 881                      * transducer==0, the result is -MIN_VALUE
 882                      *
 883                      * The transition from zero (implicitly
 884                      * positive) to the smallest negative
 885                      * signed magnitude value must be done
 886                      * explicitly.
 887                      */
 888                     else
 889                         transducer = FloatConsts.SIGN_BIT_MASK | 1;
 890             }
 891 
 892             return Float.intBitsToFloat(transducer);
 893         }
 894     }
 895 
 896     /**
 897      * Returns the floating-point value adjacent to {@code d} in
 898      * the direction of positive infinity.  This method is
 899      * semantically equivalent to {@code nextAfter(d,
 900      * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
 901      * implementation may run faster than its equivalent
 902      * {@code nextAfter} call.
 903      *
 904      * <p>Special Cases:
 905      * <ul>
 906      * <li> If the argument is NaN, the result is NaN.
 907      *
 908      * <li> If the argument is positive infinity, the result is
 909      * positive infinity.
 910      *
 911      * <li> If the argument is zero, the result is
 912      * {@code Double.MIN_VALUE}
 913      *
 914      * </ul>
 915      *
 916      * @param d  starting floating-point value
 917      * @return The adjacent floating-point value closer to positive
 918      * infinity.
 919      * @author Joseph D. Darcy
 920      */
 921     public static double nextUp(double d) {
 922         if( isNaN(d) || d == Double.POSITIVE_INFINITY)
 923             return d;
 924         else {
 925             d += 0.0d;
 926             return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
 927                                            ((d >= 0.0d)?+1L:-1L));
 928         }
 929     }
 930 
 931     /**
 932      * Returns the floating-point value adjacent to {@code f} in
 933      * the direction of positive infinity.  This method is
 934      * semantically equivalent to {@code nextAfter(f,
 935      * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
 936      * implementation may run faster than its equivalent
 937      * {@code nextAfter} call.
 938      *
 939      * <p>Special Cases:
 940      * <ul>
 941      * <li> If the argument is NaN, the result is NaN.
 942      *
 943      * <li> If the argument is positive infinity, the result is
 944      * positive infinity.
 945      *
 946      * <li> If the argument is zero, the result is
 947      * {@code Float.MIN_VALUE}
 948      *
 949      * </ul>
 950      *
 951      * @param f  starting floating-point value
 952      * @return The adjacent floating-point value closer to positive
 953      * infinity.
 954      * @author Joseph D. Darcy
 955      */
 956      public static float nextUp(float f) {
 957         if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY)
 958             return f;
 959         else {
 960             f += 0.0f;
 961             return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
 962                                         ((f >= 0.0f)?+1:-1));
 963         }
 964     }
 965 
 966     /**
 967      * Returns the floating-point value adjacent to {@code d} in
 968      * the direction of negative infinity.  This method is
 969      * semantically equivalent to {@code nextAfter(d,
 970      * Double.NEGATIVE_INFINITY)}; however, a
 971      * {@code nextDown} implementation may run faster than its
 972      * equivalent {@code nextAfter} call.
 973      *
 974      * <p>Special Cases:
 975      * <ul>
 976      * <li> If the argument is NaN, the result is NaN.
 977      *
 978      * <li> If the argument is negative infinity, the result is
 979      * negative infinity.
 980      *
 981      * <li> If the argument is zero, the result is
 982      * {@code -Double.MIN_VALUE}
 983      *
 984      * </ul>
 985      *
 986      * @param d  starting floating-point value
 987      * @return The adjacent floating-point value closer to negative
 988      * infinity.
 989      * @author Joseph D. Darcy
 990      */
 991     public static double nextDown(double d) {
 992         if( isNaN(d) || d == Double.NEGATIVE_INFINITY)
 993             return d;
 994         else {
 995             if (d == 0.0)
 996                 return -Double.MIN_VALUE;
 997             else
 998                 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
 999                                                ((d > 0.0d)?-1L:+1L));
1000         }
1001     }
1002 
1003     /**
1004      * Returns the floating-point value adjacent to {@code f} in
1005      * the direction of negative infinity.  This method is
1006      * semantically equivalent to {@code nextAfter(f,
1007      * Float.NEGATIVE_INFINITY)}; however, a
1008      * {@code nextDown} implementation may run faster than its
1009      * equivalent {@code nextAfter} call.
1010      *
1011      * <p>Special Cases:
1012      * <ul>
1013      * <li> If the argument is NaN, the result is NaN.
1014      *
1015      * <li> If the argument is negative infinity, the result is
1016      * negative infinity.
1017      *
1018      * <li> If the argument is zero, the result is
1019      * {@code -Float.MIN_VALUE}
1020      *
1021      * </ul>
1022      *
1023      * @param f  starting floating-point value
1024      * @return The adjacent floating-point value closer to negative
1025      * infinity.
1026      * @author Joseph D. Darcy
1027      */
1028     public static double nextDown(float f) {
1029         if( isNaN(f) || f == Float.NEGATIVE_INFINITY)
1030             return f;
1031         else {
1032             if (f == 0.0f)
1033                 return -Float.MIN_VALUE;
1034             else
1035                 return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
1036                                             ((f > 0.0f)?-1:+1));
1037         }
1038     }
1039 
1040     /**
1041      * Returns the first floating-point argument with the sign of the
1042      * second floating-point argument.  For this method, a NaN
1043      * {@code sign} argument is always treated as if it were
1044      * positive.
1045      *
1046      * @param magnitude  the parameter providing the magnitude of the result
1047      * @param sign   the parameter providing the sign of the result
1048      * @return a value with the magnitude of {@code magnitude}
1049      * and the sign of {@code sign}.
1050      * @author Joseph D. Darcy
1051      * @since 1.5
1052      */
1053     public static double copySign(double magnitude, double sign) {
1054         return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign));
1055     }
1056 
1057     /**
1058      * Returns the first floating-point argument with the sign of the
1059      * second floating-point argument.  For this method, a NaN
1060      * {@code sign} argument is always treated as if it were
1061      * positive.
1062      *
1063      * @param magnitude  the parameter providing the magnitude of the result
1064      * @param sign   the parameter providing the sign of the result
1065      * @return a value with the magnitude of {@code magnitude}
1066      * and the sign of {@code sign}.
1067      * @author Joseph D. Darcy
1068      */
1069      public static float copySign(float magnitude, float sign) {
1070         return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign));
1071     }
1072 
1073     /**
1074      * Returns the size of an ulp of the argument.  An ulp of a
1075      * {@code double} value is the positive distance between this
1076      * floating-point value and the {@code double} value next
1077      * larger in magnitude.  Note that for non-NaN <i>x</i>,
1078      * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1079      *
1080      * <p>Special Cases:
1081      * <ul>
1082      * <li> If the argument is NaN, then the result is NaN.
1083      * <li> If the argument is positive or negative infinity, then the
1084      * result is positive infinity.
1085      * <li> If the argument is positive or negative zero, then the result is
1086      * {@code Double.MIN_VALUE}.
1087      * <li> If the argument is &plusmn;{@code Double.MAX_VALUE}, then
1088      * the result is equal to 2<sup>971</sup>.
1089      * </ul>
1090      *
1091      * @param d the floating-point value whose ulp is to be returned
1092      * @return the size of an ulp of the argument
1093      * @author Joseph D. Darcy
1094      * @since 1.5
1095      */
1096     public static double ulp(double d) {
1097         int exp = getExponent(d);
1098 
1099         switch(exp) {
1100         case DoubleConsts.MAX_EXPONENT+1:       // NaN or infinity
1101             return Math.abs(d);

1102 
1103         case DoubleConsts.MIN_EXPONENT-1:       // zero or subnormal
1104             return Double.MIN_VALUE;

1105 
1106         default:
1107             assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT;
1108 
1109             // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1110             exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1);
1111             if (exp >= DoubleConsts.MIN_EXPONENT) {
1112                 return powerOfTwoD(exp);
1113             }
1114             else {
1115                 // return a subnormal result; left shift integer
1116                 // representation of Double.MIN_VALUE appropriate
1117                 // number of positions
1118                 return Double.longBitsToDouble(1L <<
1119                 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) ));
1120             }

1121         }
1122     }
1123 
1124     /**
1125      * Returns the size of an ulp of the argument.  An ulp of a
1126      * {@code float} value is the positive distance between this
1127      * floating-point value and the {@code float} value next
1128      * larger in magnitude.  Note that for non-NaN <i>x</i>,
1129      * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1130      *
1131      * <p>Special Cases:
1132      * <ul>
1133      * <li> If the argument is NaN, then the result is NaN.
1134      * <li> If the argument is positive or negative infinity, then the
1135      * result is positive infinity.
1136      * <li> If the argument is positive or negative zero, then the result is
1137      * {@code Float.MIN_VALUE}.
1138      * <li> If the argument is &plusmn;{@code Float.MAX_VALUE}, then
1139      * the result is equal to 2<sup>104</sup>.
1140      * </ul>
1141      *
1142      * @param f the floating-point value whose ulp is to be returned
1143      * @return the size of an ulp of the argument
1144      * @author Joseph D. Darcy
1145      * @since 1.5
1146      */
1147      public static float ulp(float f) {
1148         int exp = getExponent(f);
1149 
1150         switch(exp) {
1151         case FloatConsts.MAX_EXPONENT+1:        // NaN or infinity
1152             return Math.abs(f);

1153 
1154         case FloatConsts.MIN_EXPONENT-1:        // zero or subnormal
1155             return FloatConsts.MIN_VALUE;

1156 
1157         default:
1158             assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT;
1159 
1160             // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1161             exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1);
1162             if (exp >= FloatConsts.MIN_EXPONENT) {
1163                 return powerOfTwoF(exp);
1164             }
1165             else {
1166                 // return a subnormal result; left shift integer
1167                 // representation of FloatConsts.MIN_VALUE appropriate
1168                 // number of positions
1169                 return Float.intBitsToFloat(1 <<
1170                 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) ));
1171             }

1172         }
1173      }
1174 
1175     /**
1176      * Returns the signum function of the argument; zero if the argument
1177      * is zero, 1.0 if the argument is greater than zero, -1.0 if the
1178      * argument is less than zero.
1179      *
1180      * <p>Special Cases:
1181      * <ul>
1182      * <li> If the argument is NaN, then the result is NaN.
1183      * <li> If the argument is positive zero or negative zero, then the
1184      *      result is the same as the argument.
1185      * </ul>
1186      *
1187      * @param d the floating-point value whose signum is to be returned
1188      * @return the signum function of the argument
1189      * @author Joseph D. Darcy
1190      * @since 1.5
1191      */