1 /*
2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
108 * Many of this class's methods are members of the set of IEEE 754
109 * recommended functions or similar functions recommended or
110 * required by IEEE 754R. Discussion of various implementation
111 * techniques for these functions have occurred in:
112 *
113 * W.J. Cody and Jerome T. Coonen, "Algorithm 772 Functions to
114 * Support the IEEE Standard for Binary Floating-Point
115 * Arithmetic," ACM Transactions on Mathematical Software,
116 * vol. 19, no. 4, December 1993, pp. 443-451.
117 *
118 * Joseph D. Darcy, "Writing robust IEEE recommended functions in
119 * ``100% Pure Java''(TM)," University of California, Berkeley
120 * technical report UCB//CSD-98-1009.
121 */
122
123 /**
124 * Don't let anyone instantiate this class.
125 */
126 private FpUtils() {}
127
128 // Constants used in scalb
129 static double twoToTheDoubleScaleUp = powerOfTwoD(512);
130 static double twoToTheDoubleScaleDown = powerOfTwoD(-512);
131
132 // Helper Methods
133
134 // The following helper methods are used in the implementation of
135 // the public recommended functions; they generally omit certain
136 // tests for exception cases.
137
138 /**
139 * Returns unbiased exponent of a {@code double}.
140 */
141 public static int getExponent(double d){
142 /*
143 * Bitwise convert d to long, mask out exponent bits, shift
144 * to the right and then subtract out double's bias adjust to
145 * get true exponent value.
146 */
147 return (int)(((Double.doubleToRawLongBits(d) & DoubleConsts.EXP_BIT_MASK) >>
148 (DoubleConsts.SIGNIFICAND_WIDTH - 1)) - DoubleConsts.EXP_BIAS);
149 }
150
151 /**
152 * Returns unbiased exponent of a {@code float}.
153 */
154 public static int getExponent(float f){
155 /*
156 * Bitwise convert f to integer, mask out exponent bits, shift
157 * to the right and then subtract out float's bias adjust to
158 * get true exponent value
159 */
160 return ((Float.floatToRawIntBits(f) & FloatConsts.EXP_BIT_MASK) >>
161 (FloatConsts.SIGNIFICAND_WIDTH - 1)) - FloatConsts.EXP_BIAS;
162 }
163
164 /**
165 * Returns a floating-point power of two in the normal range.
166 */
167 static double powerOfTwoD(int n) {
168 assert(n >= DoubleConsts.MIN_EXPONENT && n <= DoubleConsts.MAX_EXPONENT);
169 return Double.longBitsToDouble((((long)n + (long)DoubleConsts.EXP_BIAS) <<
170 (DoubleConsts.SIGNIFICAND_WIDTH-1))
171 & DoubleConsts.EXP_BIT_MASK);
172 }
173
174 /**
175 * Returns a floating-point power of two in the normal range.
176 */
177 static float powerOfTwoF(int n) {
178 assert(n >= FloatConsts.MIN_EXPONENT && n <= FloatConsts.MAX_EXPONENT);
179 return Float.intBitsToFloat(((n + FloatConsts.EXP_BIAS) <<
180 (FloatConsts.SIGNIFICAND_WIDTH-1))
181 & FloatConsts.EXP_BIT_MASK);
182 }
183
184 /**
185 * Returns the first floating-point argument with the sign of the
186 * second floating-point argument. Note that unlike the {@link
187 * FpUtils#copySign(double, double) copySign} method, this method
188 * does not require NaN {@code sign} arguments to be treated
189 * as positive values; implementations are permitted to treat some
190 * NaN arguments as positive and other NaN arguments as negative
191 * to allow greater performance.
192 *
193 * @param magnitude the parameter providing the magnitude of the result
194 * @param sign the parameter providing the sign of the result
195 * @return a value with the magnitude of {@code magnitude}
196 * and the sign of {@code sign}.
197 * @author Joseph D. Darcy
198 */
199 public static double rawCopySign(double magnitude, double sign) {
200 return Double.longBitsToDouble((Double.doubleToRawLongBits(sign) &
201 (DoubleConsts.SIGN_BIT_MASK)) |
202 (Double.doubleToRawLongBits(magnitude) &
203 (DoubleConsts.EXP_BIT_MASK |
204 DoubleConsts.SIGNIF_BIT_MASK)));
205 }
206
207 /**
208 * Returns the first floating-point argument with the sign of the
209 * second floating-point argument. Note that unlike the {@link
210 * FpUtils#copySign(float, float) copySign} method, this method
211 * does not require NaN {@code sign} arguments to be treated
212 * as positive values; implementations are permitted to treat some
213 * NaN arguments as positive and other NaN arguments as negative
214 * to allow greater performance.
215 *
216 * @param magnitude the parameter providing the magnitude of the result
217 * @param sign the parameter providing the sign of the result
218 * @return a value with the magnitude of {@code magnitude}
219 * and the sign of {@code sign}.
220 * @author Joseph D. Darcy
221 */
222 public static float rawCopySign(float magnitude, float sign) {
223 return Float.intBitsToFloat((Float.floatToRawIntBits(sign) &
224 (FloatConsts.SIGN_BIT_MASK)) |
225 (Float.floatToRawIntBits(magnitude) &
226 (FloatConsts.EXP_BIT_MASK |
227 FloatConsts.SIGNIF_BIT_MASK)));
228 }
229
230 /* ***************************************************************** */
231
232 /**
233 * Returns {@code true} if the argument is a finite
234 * floating-point value; returns {@code false} otherwise (for
235 * NaN and infinity arguments).
236 *
237 * @param d the {@code double} value to be tested
238 * @return {@code true} if the argument is a finite
239 * floating-point value, {@code false} otherwise.
240 */
241 public static boolean isFinite(double d) {
242 return Math.abs(d) <= DoubleConsts.MAX_VALUE;
243 }
244
245 /**
246 * Returns {@code true} if the argument is a finite
247 * floating-point value; returns {@code false} otherwise (for
541 * exponent, an infinity is returned. Note that if the result is
542 * subnormal, precision may be lost; that is, when {@code scalb(x,
543 * n)} is subnormal, {@code scalb(scalb(x, n), -n)} may
544 * not equal <i>x</i>. When the result is non-NaN, the result has
545 * the same sign as {@code d}.
546 *
547 *<p>
548 * Special cases:
549 * <ul>
550 * <li> If the first argument is NaN, NaN is returned.
551 * <li> If the first argument is infinite, then an infinity of the
552 * same sign is returned.
553 * <li> If the first argument is zero, then a zero of the same
554 * sign is returned.
555 * </ul>
556 *
557 * @param d number to be scaled by a power of two.
558 * @param scale_factor power of 2 used to scale {@code d}
559 * @return {@code d * }2<sup>{@code scale_factor}</sup>
560 * @author Joseph D. Darcy
561 */
562 public static double scalb(double d, int scale_factor) {
563 /*
564 * This method does not need to be declared strictfp to
565 * compute the same correct result on all platforms. When
566 * scaling up, it does not matter what order the
567 * multiply-store operations are done; the result will be
568 * finite or overflow regardless of the operation ordering.
569 * However, to get the correct result when scaling down, a
570 * particular ordering must be used.
571 *
572 * When scaling down, the multiply-store operations are
573 * sequenced so that it is not possible for two consecutive
574 * multiply-stores to return subnormal results. If one
575 * multiply-store result is subnormal, the next multiply will
576 * round it away to zero. This is done by first multiplying
577 * by 2 ^ (scale_factor % n) and then multiplying several
578 * times by by 2^n as needed where n is the exponent of number
579 * that is a covenient power of two. In this way, at most one
580 * real rounding error occurs. If the double value set is
581 * being used exclusively, the rounding will occur on a
582 * multiply. If the double-extended-exponent value set is
583 * being used, the products will (perhaps) be exact but the
584 * stores to d are guaranteed to round to the double value
585 * set.
586 *
587 * It is _not_ a valid implementation to first multiply d by
588 * 2^MIN_EXPONENT and then by 2 ^ (scale_factor %
589 * MIN_EXPONENT) since even in a strictfp program double
590 * rounding on underflow could occur; e.g. if the scale_factor
591 * argument was (MIN_EXPONENT - n) and the exponent of d was a
592 * little less than -(MIN_EXPONENT - n), meaning the final
593 * result would be subnormal.
594 *
595 * Since exact reproducibility of this method can be achieved
596 * without any undue performance burden, there is no
597 * compelling reason to allow double rounding on underflow in
598 * scalb.
599 */
600
601 // magnitude of a power of two so large that scaling a finite
602 // nonzero value by it would be guaranteed to over or
603 // underflow; due to rounding, scaling down takes takes an
604 // additional power of two which is reflected here
605 final int MAX_SCALE = DoubleConsts.MAX_EXPONENT + -DoubleConsts.MIN_EXPONENT +
606 DoubleConsts.SIGNIFICAND_WIDTH + 1;
607 int exp_adjust = 0;
608 int scale_increment = 0;
609 double exp_delta = Double.NaN;
610
611 // Make sure scaling factor is in a reasonable range
612
613 if(scale_factor < 0) {
614 scale_factor = Math.max(scale_factor, -MAX_SCALE);
615 scale_increment = -512;
616 exp_delta = twoToTheDoubleScaleDown;
617 }
618 else {
619 scale_factor = Math.min(scale_factor, MAX_SCALE);
620 scale_increment = 512;
621 exp_delta = twoToTheDoubleScaleUp;
622 }
623
624 // Calculate (scale_factor % +/-512), 512 = 2^9, using
625 // technique from "Hacker's Delight" section 10-2.
626 int t = (scale_factor >> 9-1) >>> 32 - 9;
627 exp_adjust = ((scale_factor + t) & (512 -1)) - t;
628
629 d *= powerOfTwoD(exp_adjust);
630 scale_factor -= exp_adjust;
631
632 while(scale_factor != 0) {
633 d *= exp_delta;
634 scale_factor -= scale_increment;
635 }
636 return d;
637 }
638
639 /**
640 * Return {@code f} ×
641 * 2<sup>{@code scale_factor}</sup> rounded as if performed
642 * by a single correctly rounded floating-point multiply to a
643 * member of the float value set. See section 4.2.3 of
644 * <cite>The Java™ Language Specification</cite>
645 * for a discussion of floating-point
646 * value sets. If the exponent of the result is between the
647 * {@code float}'s minimum exponent and maximum exponent, the
648 * answer is calculated exactly. If the exponent of the result
649 * would be larger than {@code float}'s maximum exponent, an
650 * infinity is returned. Note that if the result is subnormal,
651 * precision may be lost; that is, when {@code scalb(x, n)}
652 * is subnormal, {@code scalb(scalb(x, n), -n)} may not equal
653 * <i>x</i>. When the result is non-NaN, the result has the same
654 * sign as {@code f}.
655 *
656 *<p>
657 * Special cases:
658 * <ul>
659 * <li> If the first argument is NaN, NaN is returned.
660 * <li> If the first argument is infinite, then an infinity of the
661 * same sign is returned.
662 * <li> If the first argument is zero, then a zero of the same
663 * sign is returned.
664 * </ul>
665 *
666 * @param f number to be scaled by a power of two.
667 * @param scale_factor power of 2 used to scale {@code f}
668 * @return {@code f * }2<sup>{@code scale_factor}</sup>
669 * @author Joseph D. Darcy
670 */
671 public static float scalb(float f, int scale_factor) {
672 // magnitude of a power of two so large that scaling a finite
673 // nonzero value by it would be guaranteed to over or
674 // underflow; due to rounding, scaling down takes takes an
675 // additional power of two which is reflected here
676 final int MAX_SCALE = FloatConsts.MAX_EXPONENT + -FloatConsts.MIN_EXPONENT +
677 FloatConsts.SIGNIFICAND_WIDTH + 1;
678
679 // Make sure scaling factor is in a reasonable range
680 scale_factor = Math.max(Math.min(scale_factor, MAX_SCALE), -MAX_SCALE);
681
682 /*
683 * Since + MAX_SCALE for float fits well within the double
684 * exponent range and + float -> double conversion is exact
685 * the multiplication below will be exact. Therefore, the
686 * rounding that occurs when the double product is cast to
687 * float will be the correctly rounded float result. Since
688 * all operations other than the final multiply will be exact,
689 * it is not necessary to declare this method strictfp.
690 */
691 return (float)((double)f*powerOfTwoD(scale_factor));
692 }
693
694 /**
695 * Returns the floating-point number adjacent to the first
696 * argument in the direction of the second argument. If both
697 * arguments compare as equal the second argument is returned.
698 *
699 * <p>
700 * Special cases:
701 * <ul>
702 * <li> If either argument is a NaN, then NaN is returned.
703 *
704 * <li> If both arguments are signed zeros, {@code direction}
705 * is returned unchanged (as implied by the requirement of
706 * returning the second argument if the arguments compare as
707 * equal).
708 *
709 * <li> If {@code start} is
710 * ±{@code Double.MIN_VALUE} and {@code direction}
711 * has a value such that the result should have a smaller
713 * is returned.
714 *
715 * <li> If {@code start} is infinite and
716 * {@code direction} has a value such that the result should
717 * have a smaller magnitude, {@code Double.MAX_VALUE} with the
718 * same sign as {@code start} is returned.
719 *
720 * <li> If {@code start} is equal to ±
721 * {@code Double.MAX_VALUE} and {@code direction} has a
722 * value such that the result should have a larger magnitude, an
723 * infinity with same sign as {@code start} is returned.
724 * </ul>
725 *
726 * @param start starting floating-point value
727 * @param direction value indicating which of
728 * {@code start}'s neighbors or {@code start} should
729 * be returned
730 * @return The floating-point number adjacent to {@code start} in the
731 * direction of {@code direction}.
732 * @author Joseph D. Darcy
733 */
734 public static double nextAfter(double start, double direction) {
735 /*
736 * The cases:
737 *
738 * nextAfter(+infinity, 0) == MAX_VALUE
739 * nextAfter(+infinity, +infinity) == +infinity
740 * nextAfter(-infinity, 0) == -MAX_VALUE
741 * nextAfter(-infinity, -infinity) == -infinity
742 *
743 * are naturally handled without any additional testing
744 */
745
746 // First check for NaN values
747 if (isNaN(start) || isNaN(direction)) {
748 // return a NaN derived from the input NaN(s)
749 return start + direction;
750 } else if (start == direction) {
751 return direction;
752 } else { // start > direction or start < direction
753 // Add +0.0 to get rid of a -0.0 (+0.0 + -0.0 => +0.0)
754 // then bitwise convert start to integer.
755 long transducer = Double.doubleToRawLongBits(start + 0.0d);
756
757 /*
758 * IEEE 754 floating-point numbers are lexicographically
759 * ordered if treated as signed- magnitude integers .
760 * Since Java's integers are two's complement,
761 * incrementing" the two's complement representation of a
762 * logically negative floating-point value *decrements*
763 * the signed-magnitude representation. Therefore, when
764 * the integer representation of a floating-point values
765 * is less than zero, the adjustment to the representation
766 * is in the opposite direction than would be expected at
767 * first .
768 */
769 if (direction > start) { // Calculate next greater value
770 transducer = transducer + (transducer >= 0L ? 1L:-1L);
771 } else { // Calculate next lesser value
772 assert direction < start;
773 if (transducer > 0L)
774 --transducer;
775 else
776 if (transducer < 0L )
777 ++transducer;
778 /*
779 * transducer==0, the result is -MIN_VALUE
780 *
781 * The transition from zero (implicitly
782 * positive) to the smallest negative
783 * signed magnitude value must be done
784 * explicitly.
785 */
786 else
787 transducer = DoubleConsts.SIGN_BIT_MASK | 1L;
788 }
789
790 return Double.longBitsToDouble(transducer);
791 }
792 }
793
794 /**
795 * Returns the floating-point number adjacent to the first
796 * argument in the direction of the second argument. If both
797 * arguments compare as equal, the second argument is returned.
798 *
799 * <p>
800 * Special cases:
801 * <ul>
802 * <li> If either argument is a NaN, then NaN is returned.
803 *
804 * <li> If both arguments are signed zeros, a {@code float}
805 * zero with the same sign as {@code direction} is returned
806 * (as implied by the requirement of returning the second argument
807 * if the arguments compare as equal).
808 *
809 * <li> If {@code start} is
810 * ±{@code Float.MIN_VALUE} and {@code direction}
811 * has a value such that the result should have a smaller
813 * is returned.
814 *
815 * <li> If {@code start} is infinite and
816 * {@code direction} has a value such that the result should
817 * have a smaller magnitude, {@code Float.MAX_VALUE} with the
818 * same sign as {@code start} is returned.
819 *
820 * <li> If {@code start} is equal to ±
821 * {@code Float.MAX_VALUE} and {@code direction} has a
822 * value such that the result should have a larger magnitude, an
823 * infinity with same sign as {@code start} is returned.
824 * </ul>
825 *
826 * @param start starting floating-point value
827 * @param direction value indicating which of
828 * {@code start}'s neighbors or {@code start} should
829 * be returned
830 * @return The floating-point number adjacent to {@code start} in the
831 * direction of {@code direction}.
832 * @author Joseph D. Darcy
833 */
834 public static float nextAfter(float start, double direction) {
835 /*
836 * The cases:
837 *
838 * nextAfter(+infinity, 0) == MAX_VALUE
839 * nextAfter(+infinity, +infinity) == +infinity
840 * nextAfter(-infinity, 0) == -MAX_VALUE
841 * nextAfter(-infinity, -infinity) == -infinity
842 *
843 * are naturally handled without any additional testing
844 */
845
846 // First check for NaN values
847 if (isNaN(start) || isNaN(direction)) {
848 // return a NaN derived from the input NaN(s)
849 return start + (float)direction;
850 } else if (start == direction) {
851 return (float)direction;
852 } else { // start > direction or start < direction
853 // Add +0.0 to get rid of a -0.0 (+0.0 + -0.0 => +0.0)
854 // then bitwise convert start to integer.
855 int transducer = Float.floatToRawIntBits(start + 0.0f);
856
857 /*
858 * IEEE 754 floating-point numbers are lexicographically
859 * ordered if treated as signed- magnitude integers .
860 * Since Java's integers are two's complement,
861 * incrementing" the two's complement representation of a
862 * logically negative floating-point value *decrements*
863 * the signed-magnitude representation. Therefore, when
864 * the integer representation of a floating-point values
865 * is less than zero, the adjustment to the representation
866 * is in the opposite direction than would be expected at
867 * first.
868 */
869 if (direction > start) {// Calculate next greater value
870 transducer = transducer + (transducer >= 0 ? 1:-1);
871 } else { // Calculate next lesser value
872 assert direction < start;
873 if (transducer > 0)
874 --transducer;
875 else
876 if (transducer < 0 )
877 ++transducer;
878 /*
879 * transducer==0, the result is -MIN_VALUE
880 *
881 * The transition from zero (implicitly
882 * positive) to the smallest negative
883 * signed magnitude value must be done
884 * explicitly.
885 */
886 else
887 transducer = FloatConsts.SIGN_BIT_MASK | 1;
888 }
889
890 return Float.intBitsToFloat(transducer);
891 }
892 }
893
894 /**
895 * Returns the floating-point value adjacent to {@code d} in
896 * the direction of positive infinity. This method is
897 * semantically equivalent to {@code nextAfter(d,
898 * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
899 * implementation may run faster than its equivalent
900 * {@code nextAfter} call.
901 *
902 * <p>Special Cases:
903 * <ul>
904 * <li> If the argument is NaN, the result is NaN.
905 *
906 * <li> If the argument is positive infinity, the result is
907 * positive infinity.
908 *
909 * <li> If the argument is zero, the result is
910 * {@code Double.MIN_VALUE}
911 *
912 * </ul>
913 *
914 * @param d starting floating-point value
915 * @return The adjacent floating-point value closer to positive
916 * infinity.
917 * @author Joseph D. Darcy
918 */
919 public static double nextUp(double d) {
920 if( isNaN(d) || d == Double.POSITIVE_INFINITY)
921 return d;
922 else {
923 d += 0.0d;
924 return Double.longBitsToDouble(Double.doubleToRawLongBits(d) +
925 ((d >= 0.0d)?+1L:-1L));
926 }
927 }
928
929 /**
930 * Returns the floating-point value adjacent to {@code f} in
931 * the direction of positive infinity. This method is
932 * semantically equivalent to {@code nextAfter(f,
933 * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
934 * implementation may run faster than its equivalent
935 * {@code nextAfter} call.
936 *
937 * <p>Special Cases:
938 * <ul>
939 * <li> If the argument is NaN, the result is NaN.
940 *
941 * <li> If the argument is positive infinity, the result is
942 * positive infinity.
943 *
944 * <li> If the argument is zero, the result is
945 * {@code Float.MIN_VALUE}
946 *
947 * </ul>
948 *
949 * @param f starting floating-point value
950 * @return The adjacent floating-point value closer to positive
951 * infinity.
952 * @author Joseph D. Darcy
953 */
954 public static float nextUp(float f) {
955 if( isNaN(f) || f == FloatConsts.POSITIVE_INFINITY)
956 return f;
957 else {
958 f += 0.0f;
959 return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
960 ((f >= 0.0f)?+1:-1));
961 }
962 }
963
964 /**
965 * Returns the floating-point value adjacent to {@code d} in
966 * the direction of negative infinity. This method is
967 * semantically equivalent to {@code nextAfter(d,
968 * Double.NEGATIVE_INFINITY)}; however, a
969 * {@code nextDown} implementation may run faster than its
970 * equivalent {@code nextAfter} call.
971 *
972 * <p>Special Cases:
973 * <ul>
974 * <li> If the argument is NaN, the result is NaN.
975 *
976 * <li> If the argument is negative infinity, the result is
977 * negative infinity.
978 *
979 * <li> If the argument is zero, the result is
980 * {@code -Double.MIN_VALUE}
981 *
1030 if (f == 0.0f)
1031 return -Float.MIN_VALUE;
1032 else
1033 return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
1034 ((f > 0.0f)?-1:+1));
1035 }
1036 }
1037
1038 /**
1039 * Returns the first floating-point argument with the sign of the
1040 * second floating-point argument. For this method, a NaN
1041 * {@code sign} argument is always treated as if it were
1042 * positive.
1043 *
1044 * @param magnitude the parameter providing the magnitude of the result
1045 * @param sign the parameter providing the sign of the result
1046 * @return a value with the magnitude of {@code magnitude}
1047 * and the sign of {@code sign}.
1048 * @author Joseph D. Darcy
1049 * @since 1.5
1050 */
1051 public static double copySign(double magnitude, double sign) {
1052 return rawCopySign(magnitude, (isNaN(sign)?1.0d:sign));
1053 }
1054
1055 /**
1056 * Returns the first floating-point argument with the sign of the
1057 * second floating-point argument. For this method, a NaN
1058 * {@code sign} argument is always treated as if it were
1059 * positive.
1060 *
1061 * @param magnitude the parameter providing the magnitude of the result
1062 * @param sign the parameter providing the sign of the result
1063 * @return a value with the magnitude of {@code magnitude}
1064 * and the sign of {@code sign}.
1065 * @author Joseph D. Darcy
1066 */
1067 public static float copySign(float magnitude, float sign) {
1068 return rawCopySign(magnitude, (isNaN(sign)?1.0f:sign));
1069 }
1070
1071 /**
1072 * Returns the size of an ulp of the argument. An ulp of a
1073 * {@code double} value is the positive distance between this
1074 * floating-point value and the {@code double} value next
1075 * larger in magnitude. Note that for non-NaN <i>x</i>,
1076 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1077 *
1078 * <p>Special Cases:
1079 * <ul>
1080 * <li> If the argument is NaN, then the result is NaN.
1081 * <li> If the argument is positive or negative infinity, then the
1082 * result is positive infinity.
1083 * <li> If the argument is positive or negative zero, then the result is
1084 * {@code Double.MIN_VALUE}.
1085 * <li> If the argument is ±{@code Double.MAX_VALUE}, then
1086 * the result is equal to 2<sup>971</sup>.
1087 * </ul>
1088 *
1089 * @param d the floating-point value whose ulp is to be returned
1090 * @return the size of an ulp of the argument
1091 * @author Joseph D. Darcy
1092 * @since 1.5
1093 */
1094 public static double ulp(double d) {
1095 int exp = getExponent(d);
1096
1097 switch(exp) {
1098 case DoubleConsts.MAX_EXPONENT+1: // NaN or infinity
1099 return Math.abs(d);
1100
1101 case DoubleConsts.MIN_EXPONENT-1: // zero or subnormal
1102 return Double.MIN_VALUE;
1103
1104 default:
1105 assert exp <= DoubleConsts.MAX_EXPONENT && exp >= DoubleConsts.MIN_EXPONENT;
1106
1107 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1108 exp = exp - (DoubleConsts.SIGNIFICAND_WIDTH-1);
1109 if (exp >= DoubleConsts.MIN_EXPONENT) {
1110 return powerOfTwoD(exp);
1111 }
1112 else {
1113 // return a subnormal result; left shift integer
1114 // representation of Double.MIN_VALUE appropriate
1115 // number of positions
1116 return Double.longBitsToDouble(1L <<
1117 (exp - (DoubleConsts.MIN_EXPONENT - (DoubleConsts.SIGNIFICAND_WIDTH-1)) ));
1118 }
1119 }
1120 }
1121
1122 /**
1123 * Returns the size of an ulp of the argument. An ulp of a
1124 * {@code float} value is the positive distance between this
1125 * floating-point value and the {@code float} value next
1126 * larger in magnitude. Note that for non-NaN <i>x</i>,
1127 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
1128 *
1129 * <p>Special Cases:
1130 * <ul>
1131 * <li> If the argument is NaN, then the result is NaN.
1132 * <li> If the argument is positive or negative infinity, then the
1133 * result is positive infinity.
1134 * <li> If the argument is positive or negative zero, then the result is
1135 * {@code Float.MIN_VALUE}.
1136 * <li> If the argument is ±{@code Float.MAX_VALUE}, then
1137 * the result is equal to 2<sup>104</sup>.
1138 * </ul>
1139 *
1140 * @param f the floating-point value whose ulp is to be returned
1141 * @return the size of an ulp of the argument
1142 * @author Joseph D. Darcy
1143 * @since 1.5
1144 */
1145 public static float ulp(float f) {
1146 int exp = getExponent(f);
1147
1148 switch(exp) {
1149 case FloatConsts.MAX_EXPONENT+1: // NaN or infinity
1150 return Math.abs(f);
1151
1152 case FloatConsts.MIN_EXPONENT-1: // zero or subnormal
1153 return FloatConsts.MIN_VALUE;
1154
1155 default:
1156 assert exp <= FloatConsts.MAX_EXPONENT && exp >= FloatConsts.MIN_EXPONENT;
1157
1158 // ulp(x) is usually 2^(SIGNIFICAND_WIDTH-1)*(2^ilogb(x))
1159 exp = exp - (FloatConsts.SIGNIFICAND_WIDTH-1);
1160 if (exp >= FloatConsts.MIN_EXPONENT) {
1161 return powerOfTwoF(exp);
1162 }
1163 else {
1164 // return a subnormal result; left shift integer
1165 // representation of FloatConsts.MIN_VALUE appropriate
1166 // number of positions
1167 return Float.intBitsToFloat(1 <<
1168 (exp - (FloatConsts.MIN_EXPONENT - (FloatConsts.SIGNIFICAND_WIDTH-1)) ));
1169 }
1170 }
1171 }
1172
1173 /**
1174 * Returns the signum function of the argument; zero if the argument
1175 * is zero, 1.0 if the argument is greater than zero, -1.0 if the
1176 * argument is less than zero.
1177 *
1178 * <p>Special Cases:
1179 * <ul>
1180 * <li> If the argument is NaN, then the result is NaN.
1181 * <li> If the argument is positive zero or negative zero, then the
1182 * result is the same as the argument.
1183 * </ul>
1184 *
1185 * @param d the floating-point value whose signum is to be returned
1186 * @return the signum function of the argument
1187 * @author Joseph D. Darcy
1188 * @since 1.5
1189 */
1190 public static double signum(double d) {
1191 return (d == 0.0 || isNaN(d))?d:copySign(1.0, d);
1192 }
1193
1194 /**
1195 * Returns the signum function of the argument; zero if the argument
1196 * is zero, 1.0f if the argument is greater than zero, -1.0f if the
1197 * argument is less than zero.
1198 *
1199 * <p>Special Cases:
1200 * <ul>
1201 * <li> If the argument is NaN, then the result is NaN.
1202 * <li> If the argument is positive zero or negative zero, then the
1203 * result is the same as the argument.
1204 * </ul>
1205 *
1206 * @param f the floating-point value whose signum is to be returned
1207 * @return the signum function of the argument
1208 * @author Joseph D. Darcy
1209 * @since 1.5
1210 */
1211 public static float signum(float f) {
1212 return (f == 0.0f || isNaN(f))?f:copySign(1.0f, f);
1213 }
1214
1215 }
|
1 /*
2 * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
108 * Many of this class's methods are members of the set of IEEE 754
109 * recommended functions or similar functions recommended or
110 * required by IEEE 754R. Discussion of various implementation
111 * techniques for these functions have occurred in:
112 *
113 * W.J. Cody and Jerome T. Coonen, "Algorithm 772 Functions to
114 * Support the IEEE Standard for Binary Floating-Point
115 * Arithmetic," ACM Transactions on Mathematical Software,
116 * vol. 19, no. 4, December 1993, pp. 443-451.
117 *
118 * Joseph D. Darcy, "Writing robust IEEE recommended functions in
119 * ``100% Pure Java''(TM)," University of California, Berkeley
120 * technical report UCB//CSD-98-1009.
121 */
122
123 /**
124 * Don't let anyone instantiate this class.
125 */
126 private FpUtils() {}
127
128 // Helper Methods
129
130 // The following helper methods are used in the implementation of
131 // the public recommended functions; they generally omit certain
132 // tests for exception cases.
133
134 /**
135 * Returns unbiased exponent of a {@code double}.
136 * @deprecated Use Math.getExponent.
137 */
138 @Deprecated
139 public static int getExponent(double d){
140 return Math.getExponent(d);
141 }
142
143 /**
144 * Returns unbiased exponent of a {@code float}.
145 * @deprecated Use Math.getExponent.
146 */
147 @Deprecated
148 public static int getExponent(float f){
149 return Math.getExponent(f);
150 }
151
152
153 /**
154 * Returns the first floating-point argument with the sign of the
155 * second floating-point argument. Note that unlike the {@link
156 * FpUtils#copySign(double, double) copySign} method, this method
157 * does not require NaN {@code sign} arguments to be treated
158 * as positive values; implementations are permitted to treat some
159 * NaN arguments as positive and other NaN arguments as negative
160 * to allow greater performance.
161 *
162 * @param magnitude the parameter providing the magnitude of the result
163 * @param sign the parameter providing the sign of the result
164 * @return a value with the magnitude of {@code magnitude}
165 * and the sign of {@code sign}.
166 * @author Joseph D. Darcy
167 * @deprecated Use Math.copySign.
168 */
169 @Deprecated
170 public static double rawCopySign(double magnitude, double sign) {
171 return Math.copySign(magnitude, sign);
172 }
173
174 /**
175 * Returns the first floating-point argument with the sign of the
176 * second floating-point argument. Note that unlike the {@link
177 * FpUtils#copySign(float, float) copySign} method, this method
178 * does not require NaN {@code sign} arguments to be treated
179 * as positive values; implementations are permitted to treat some
180 * NaN arguments as positive and other NaN arguments as negative
181 * to allow greater performance.
182 *
183 * @param magnitude the parameter providing the magnitude of the result
184 * @param sign the parameter providing the sign of the result
185 * @return a value with the magnitude of {@code magnitude}
186 * and the sign of {@code sign}.
187 * @author Joseph D. Darcy
188 * @deprecated Use Math.copySign.
189 */
190 @Deprecated
191 public static float rawCopySign(float magnitude, float sign) {
192 return Math.copySign(magnitude, sign);
193 }
194
195 /* ***************************************************************** */
196
197 /**
198 * Returns {@code true} if the argument is a finite
199 * floating-point value; returns {@code false} otherwise (for
200 * NaN and infinity arguments).
201 *
202 * @param d the {@code double} value to be tested
203 * @return {@code true} if the argument is a finite
204 * floating-point value, {@code false} otherwise.
205 */
206 public static boolean isFinite(double d) {
207 return Math.abs(d) <= DoubleConsts.MAX_VALUE;
208 }
209
210 /**
211 * Returns {@code true} if the argument is a finite
212 * floating-point value; returns {@code false} otherwise (for
506 * exponent, an infinity is returned. Note that if the result is
507 * subnormal, precision may be lost; that is, when {@code scalb(x,
508 * n)} is subnormal, {@code scalb(scalb(x, n), -n)} may
509 * not equal <i>x</i>. When the result is non-NaN, the result has
510 * the same sign as {@code d}.
511 *
512 *<p>
513 * Special cases:
514 * <ul>
515 * <li> If the first argument is NaN, NaN is returned.
516 * <li> If the first argument is infinite, then an infinity of the
517 * same sign is returned.
518 * <li> If the first argument is zero, then a zero of the same
519 * sign is returned.
520 * </ul>
521 *
522 * @param d number to be scaled by a power of two.
523 * @param scale_factor power of 2 used to scale {@code d}
524 * @return {@code d * }2<sup>{@code scale_factor}</sup>
525 * @author Joseph D. Darcy
526 * @deprecated Use Math.scalb.
527 */
528 @Deprecated
529 public static double scalb(double d, int scale_factor) {
530 return Math.scalb(d, scale_factor);
531 }
532
533 /**
534 * Return {@code f} ×
535 * 2<sup>{@code scale_factor}</sup> rounded as if performed
536 * by a single correctly rounded floating-point multiply to a
537 * member of the float value set. See section 4.2.3 of
538 * <cite>The Java™ Language Specification</cite>
539 * for a discussion of floating-point
540 * value sets. If the exponent of the result is between the
541 * {@code float}'s minimum exponent and maximum exponent, the
542 * answer is calculated exactly. If the exponent of the result
543 * would be larger than {@code float}'s maximum exponent, an
544 * infinity is returned. Note that if the result is subnormal,
545 * precision may be lost; that is, when {@code scalb(x, n)}
546 * is subnormal, {@code scalb(scalb(x, n), -n)} may not equal
547 * <i>x</i>. When the result is non-NaN, the result has the same
548 * sign as {@code f}.
549 *
550 *<p>
551 * Special cases:
552 * <ul>
553 * <li> If the first argument is NaN, NaN is returned.
554 * <li> If the first argument is infinite, then an infinity of the
555 * same sign is returned.
556 * <li> If the first argument is zero, then a zero of the same
557 * sign is returned.
558 * </ul>
559 *
560 * @param f number to be scaled by a power of two.
561 * @param scale_factor power of 2 used to scale {@code f}
562 * @return {@code f * }2<sup>{@code scale_factor}</sup>
563 * @author Joseph D. Darcy
564 * @deprecated Use Math.scalb.
565 */
566 @Deprecated
567 public static float scalb(float f, int scale_factor) {
568 return Math.scalb(f, scale_factor);
569 }
570
571 /**
572 * Returns the floating-point number adjacent to the first
573 * argument in the direction of the second argument. If both
574 * arguments compare as equal the second argument is returned.
575 *
576 * <p>
577 * Special cases:
578 * <ul>
579 * <li> If either argument is a NaN, then NaN is returned.
580 *
581 * <li> If both arguments are signed zeros, {@code direction}
582 * is returned unchanged (as implied by the requirement of
583 * returning the second argument if the arguments compare as
584 * equal).
585 *
586 * <li> If {@code start} is
587 * ±{@code Double.MIN_VALUE} and {@code direction}
588 * has a value such that the result should have a smaller
590 * is returned.
591 *
592 * <li> If {@code start} is infinite and
593 * {@code direction} has a value such that the result should
594 * have a smaller magnitude, {@code Double.MAX_VALUE} with the
595 * same sign as {@code start} is returned.
596 *
597 * <li> If {@code start} is equal to ±
598 * {@code Double.MAX_VALUE} and {@code direction} has a
599 * value such that the result should have a larger magnitude, an
600 * infinity with same sign as {@code start} is returned.
601 * </ul>
602 *
603 * @param start starting floating-point value
604 * @param direction value indicating which of
605 * {@code start}'s neighbors or {@code start} should
606 * be returned
607 * @return The floating-point number adjacent to {@code start} in the
608 * direction of {@code direction}.
609 * @author Joseph D. Darcy
610 * @deprecated Use Math.nextAfter
611 */
612 @Deprecated
613 public static double nextAfter(double start, double direction) {
614 return Math.nextAfter(start, direction);
615 }
616
617 /**
618 * Returns the floating-point number adjacent to the first
619 * argument in the direction of the second argument. If both
620 * arguments compare as equal, the second argument is returned.
621 *
622 * <p>
623 * Special cases:
624 * <ul>
625 * <li> If either argument is a NaN, then NaN is returned.
626 *
627 * <li> If both arguments are signed zeros, a {@code float}
628 * zero with the same sign as {@code direction} is returned
629 * (as implied by the requirement of returning the second argument
630 * if the arguments compare as equal).
631 *
632 * <li> If {@code start} is
633 * ±{@code Float.MIN_VALUE} and {@code direction}
634 * has a value such that the result should have a smaller
636 * is returned.
637 *
638 * <li> If {@code start} is infinite and
639 * {@code direction} has a value such that the result should
640 * have a smaller magnitude, {@code Float.MAX_VALUE} with the
641 * same sign as {@code start} is returned.
642 *
643 * <li> If {@code start} is equal to ±
644 * {@code Float.MAX_VALUE} and {@code direction} has a
645 * value such that the result should have a larger magnitude, an
646 * infinity with same sign as {@code start} is returned.
647 * </ul>
648 *
649 * @param start starting floating-point value
650 * @param direction value indicating which of
651 * {@code start}'s neighbors or {@code start} should
652 * be returned
653 * @return The floating-point number adjacent to {@code start} in the
654 * direction of {@code direction}.
655 * @author Joseph D. Darcy
656 * @deprecated Use Math.nextAfter.
657 */
658 @Deprecated
659 public static float nextAfter(float start, double direction) {
660 return Math.nextAfter(start, direction);
661 }
662
663 /**
664 * Returns the floating-point value adjacent to {@code d} in
665 * the direction of positive infinity. This method is
666 * semantically equivalent to {@code nextAfter(d,
667 * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
668 * implementation may run faster than its equivalent
669 * {@code nextAfter} call.
670 *
671 * <p>Special Cases:
672 * <ul>
673 * <li> If the argument is NaN, the result is NaN.
674 *
675 * <li> If the argument is positive infinity, the result is
676 * positive infinity.
677 *
678 * <li> If the argument is zero, the result is
679 * {@code Double.MIN_VALUE}
680 *
681 * </ul>
682 *
683 * @param d starting floating-point value
684 * @return The adjacent floating-point value closer to positive
685 * infinity.
686 * @author Joseph D. Darcy
687 * @deprecated use Math.nextUp.
688 */
689 @Deprecated
690 public static double nextUp(double d) {
691 return Math.nextUp(d);
692 }
693
694 /**
695 * Returns the floating-point value adjacent to {@code f} in
696 * the direction of positive infinity. This method is
697 * semantically equivalent to {@code nextAfter(f,
698 * Double.POSITIVE_INFINITY)}; however, a {@code nextUp}
699 * implementation may run faster than its equivalent
700 * {@code nextAfter} call.
701 *
702 * <p>Special Cases:
703 * <ul>
704 * <li> If the argument is NaN, the result is NaN.
705 *
706 * <li> If the argument is positive infinity, the result is
707 * positive infinity.
708 *
709 * <li> If the argument is zero, the result is
710 * {@code Float.MIN_VALUE}
711 *
712 * </ul>
713 *
714 * @param f starting floating-point value
715 * @return The adjacent floating-point value closer to positive
716 * infinity.
717 * @author Joseph D. Darcy
718 * @deprecated Use Math.nextUp.
719 */
720 @Deprecated
721 public static float nextUp(float f) {
722 return Math.nextUp(f);
723 }
724
725 /**
726 * Returns the floating-point value adjacent to {@code d} in
727 * the direction of negative infinity. This method is
728 * semantically equivalent to {@code nextAfter(d,
729 * Double.NEGATIVE_INFINITY)}; however, a
730 * {@code nextDown} implementation may run faster than its
731 * equivalent {@code nextAfter} call.
732 *
733 * <p>Special Cases:
734 * <ul>
735 * <li> If the argument is NaN, the result is NaN.
736 *
737 * <li> If the argument is negative infinity, the result is
738 * negative infinity.
739 *
740 * <li> If the argument is zero, the result is
741 * {@code -Double.MIN_VALUE}
742 *
791 if (f == 0.0f)
792 return -Float.MIN_VALUE;
793 else
794 return Float.intBitsToFloat(Float.floatToRawIntBits(f) +
795 ((f > 0.0f)?-1:+1));
796 }
797 }
798
799 /**
800 * Returns the first floating-point argument with the sign of the
801 * second floating-point argument. For this method, a NaN
802 * {@code sign} argument is always treated as if it were
803 * positive.
804 *
805 * @param magnitude the parameter providing the magnitude of the result
806 * @param sign the parameter providing the sign of the result
807 * @return a value with the magnitude of {@code magnitude}
808 * and the sign of {@code sign}.
809 * @author Joseph D. Darcy
810 * @since 1.5
811 * @deprecated Use StrictMath.copySign.
812 */
813 @Deprecated
814 public static double copySign(double magnitude, double sign) {
815 return StrictMath.copySign(magnitude, sign);
816 }
817
818 /**
819 * Returns the first floating-point argument with the sign of the
820 * second floating-point argument. For this method, a NaN
821 * {@code sign} argument is always treated as if it were
822 * positive.
823 *
824 * @param magnitude the parameter providing the magnitude of the result
825 * @param sign the parameter providing the sign of the result
826 * @return a value with the magnitude of {@code magnitude}
827 * and the sign of {@code sign}.
828 * @author Joseph D. Darcy
829 * @deprecated Use StrictMath.copySign.
830 */
831 @Deprecated
832 public static float copySign(float magnitude, float sign) {
833 return StrictMath.copySign(magnitude, sign);
834 }
835
836 /**
837 * Returns the size of an ulp of the argument. An ulp of a
838 * {@code double} value is the positive distance between this
839 * floating-point value and the {@code double} value next
840 * larger in magnitude. Note that for non-NaN <i>x</i>,
841 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
842 *
843 * <p>Special Cases:
844 * <ul>
845 * <li> If the argument is NaN, then the result is NaN.
846 * <li> If the argument is positive or negative infinity, then the
847 * result is positive infinity.
848 * <li> If the argument is positive or negative zero, then the result is
849 * {@code Double.MIN_VALUE}.
850 * <li> If the argument is ±{@code Double.MAX_VALUE}, then
851 * the result is equal to 2<sup>971</sup>.
852 * </ul>
853 *
854 * @param d the floating-point value whose ulp is to be returned
855 * @return the size of an ulp of the argument
856 * @author Joseph D. Darcy
857 * @since 1.5
858 * @deprecated Use Math.ulp.
859 */
860 @Deprecated
861 public static double ulp(double d) {
862 return Math.ulp(d);
863 }
864
865 /**
866 * Returns the size of an ulp of the argument. An ulp of a
867 * {@code float} value is the positive distance between this
868 * floating-point value and the {@code float} value next
869 * larger in magnitude. Note that for non-NaN <i>x</i>,
870 * <code>ulp(-<i>x</i>) == ulp(<i>x</i>)</code>.
871 *
872 * <p>Special Cases:
873 * <ul>
874 * <li> If the argument is NaN, then the result is NaN.
875 * <li> If the argument is positive or negative infinity, then the
876 * result is positive infinity.
877 * <li> If the argument is positive or negative zero, then the result is
878 * {@code Float.MIN_VALUE}.
879 * <li> If the argument is ±{@code Float.MAX_VALUE}, then
880 * the result is equal to 2<sup>104</sup>.
881 * </ul>
882 *
883 * @param f the floating-point value whose ulp is to be returned
884 * @return the size of an ulp of the argument
885 * @author Joseph D. Darcy
886 * @since 1.5
887 * @deprecated Use Math.ulp.
888 */
889 @Deprecated
890 public static float ulp(float f) {
891 return Math.ulp(f);
892 }
893
894 /**
895 * Returns the signum function of the argument; zero if the argument
896 * is zero, 1.0 if the argument is greater than zero, -1.0 if the
897 * argument is less than zero.
898 *
899 * <p>Special Cases:
900 * <ul>
901 * <li> If the argument is NaN, then the result is NaN.
902 * <li> If the argument is positive zero or negative zero, then the
903 * result is the same as the argument.
904 * </ul>
905 *
906 * @param d the floating-point value whose signum is to be returned
907 * @return the signum function of the argument
908 * @author Joseph D. Darcy
909 * @since 1.5
910 * @deprecated Use Math.signum.
911 */
912 @Deprecated
913 public static double signum(double d) {
914 return Math.signum(d);
915 }
916
917 /**
918 * Returns the signum function of the argument; zero if the argument
919 * is zero, 1.0f if the argument is greater than zero, -1.0f if the
920 * argument is less than zero.
921 *
922 * <p>Special Cases:
923 * <ul>
924 * <li> If the argument is NaN, then the result is NaN.
925 * <li> If the argument is positive zero or negative zero, then the
926 * result is the same as the argument.
927 * </ul>
928 *
929 * @param f the floating-point value whose signum is to be returned
930 * @return the signum function of the argument
931 * @author Joseph D. Darcy
932 * @since 1.5
933 * @deprecated Use Math.signum.
934 */
935 @Deprecated
936 public static float signum(float f) {
937 return Math.signum(f);
938 }
939 }
|