src/share/classes/sun/misc/FloatingDecimal.java

Print this page
rev 7487 : 7192954: Fix Float.parseFloat to round correctly and preserve monotonicity.
4396272: Parsing doubles fails to follow IEEE for largest decimal that should yield 0
7039391: Use Math.ulp in FloatingDecimal
Summary: Correct rounding and monotonicity problems in floats and doubles
Reviewed-by: martin
Contributed-by: Dmitry Nadezhin <dmitry.nadezhin@oracle.com>, Louis Wasserman <lowasser@google.com>

*** 47,62 **** --- 47,64 ---- static final int MIN_SMALL_BIN_EXP = -( 63 / 3 ); static final int MAX_DECIMAL_DIGITS = 15; static final int MAX_DECIMAL_EXPONENT = 308; static final int MIN_DECIMAL_EXPONENT = -324; static final int BIG_DECIMAL_EXPONENT = 324; // i.e. abs(MIN_DECIMAL_EXPONENT) + static final int MAX_NDIGITS = 1100; static final int SINGLE_EXP_SHIFT = FloatConsts.SIGNIFICAND_WIDTH - 1; static final int SINGLE_FRACT_HOB = 1<<SINGLE_EXP_SHIFT; static final int SINGLE_MAX_DECIMAL_DIGITS = 7; static final int SINGLE_MAX_DECIMAL_EXPONENT = 38; static final int SINGLE_MIN_DECIMAL_EXPONENT = -45; + static final int SINGLE_MAX_NDIGITS = 200; static final int INT_DECIMAL_DIGITS = 9; /** * Converts a double precision floating point value to a <code>String</code>.
*** 1000,1120 **** /** * A <code>ASCIIToBinaryConverter</code> container for a <code>double</code>. */ static class PreparedASCIIToBinaryBuffer implements ASCIIToBinaryConverter { final private double doubleVal; ! private int roundDir = 0; ! public PreparedASCIIToBinaryBuffer(double doubleVal) { this.doubleVal = doubleVal; ! } ! ! public PreparedASCIIToBinaryBuffer(double doubleVal, int roundDir) { ! this.doubleVal = doubleVal; ! this.roundDir = roundDir; } @Override public double doubleValue() { return doubleVal; } @Override public float floatValue() { ! return stickyRound(doubleVal,roundDir); } } ! static final ASCIIToBinaryConverter A2BC_POSITIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.POSITIVE_INFINITY); ! static final ASCIIToBinaryConverter A2BC_NEGATIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.NEGATIVE_INFINITY); ! static final ASCIIToBinaryConverter A2BC_NOT_A_NUMBER = new PreparedASCIIToBinaryBuffer(Double.NaN); ! static final ASCIIToBinaryConverter A2BC_POSITIVE_ZERO = new PreparedASCIIToBinaryBuffer(0.0d); ! static final ASCIIToBinaryConverter A2BC_NEGATIVE_ZERO = new PreparedASCIIToBinaryBuffer(-0.0d); /** * A buffered implementation of <code>ASCIIToBinaryConverter</code>. */ static class ASCIIToBinaryBuffer implements ASCIIToBinaryConverter { boolean isNegative; int decExponent; char digits[]; int nDigits; - int roundDir = 0; // set by doubleValue ASCIIToBinaryBuffer( boolean negSign, int decExponent, char[] digits, int n) { this.isNegative = negSign; this.decExponent = decExponent; this.digits = digits; this.nDigits = n; } ! @Override ! public double doubleValue() { ! return doubleValue(false); ! } ! ! /** ! * Computes a number that is the ULP of the given value, ! * for purposes of addition/subtraction. Generally easy. ! * More difficult if subtracting and the argument ! * is a normalized a power of 2, as the ULP changes at these points. ! */ ! private static double ulp(double dval, boolean subtracting) { ! long lbits = Double.doubleToLongBits(dval) & ~DoubleConsts.SIGN_BIT_MASK; ! int binexp = (int) (lbits >>> EXP_SHIFT); ! double ulpval; ! if (subtracting && (binexp >= EXP_SHIFT) && ((lbits & DoubleConsts.SIGNIF_BIT_MASK) == 0L)) { ! // for subtraction from normalized, powers of 2, ! // use next-smaller exponent ! binexp -= 1; ! } ! if (binexp > EXP_SHIFT) { ! ulpval = Double.longBitsToDouble(((long) (binexp - EXP_SHIFT)) << EXP_SHIFT); ! } else if (binexp == 0) { ! ulpval = Double.MIN_VALUE; ! } else { ! ulpval = Double.longBitsToDouble(1L << (binexp - 1)); ! } ! if (subtracting) { ! ulpval = -ulpval; ! } ! ! return ulpval; ! } ! ! /** * Takes a FloatingDecimal, which we presumably just scanned in, * and finds out what its value is, as a double. * * AS A SIDE EFFECT, SET roundDir TO INDICATE PREFERRED * ROUNDING DIRECTION in case the result is really destined * for a single-precision float. */ ! private strictfp double doubleValue(boolean mustSetRoundDir) { int kDigits = Math.min(nDigits, MAX_DECIMAL_DIGITS + 1); - long lValue; - double dValue; - double rValue; - - if (mustSetRoundDir) { - roundDir = 0; - } // // convert the lead kDigits to a long integer. // // (special performance hack: start to do it using int) int iValue = (int) digits[0] - (int) '0'; int iDigits = Math.min(kDigits, INT_DECIMAL_DIGITS); for (int i = 1; i < iDigits; i++) { iValue = iValue * 10 + (int) digits[i] - (int) '0'; } ! lValue = (long) iValue; for (int i = iDigits; i < kDigits; i++) { lValue = lValue * 10L + (long) ((int) digits[i] - (int) '0'); } ! dValue = (double) lValue; int exp = decExponent - kDigits; // // lValue now contains a long integer with the value of // the first kDigits digits of the number. // dValue contains the (double) of the same. --- 1002,1077 ---- /** * A <code>ASCIIToBinaryConverter</code> container for a <code>double</code>. */ static class PreparedASCIIToBinaryBuffer implements ASCIIToBinaryConverter { final private double doubleVal; ! final private float floatVal; ! public PreparedASCIIToBinaryBuffer(double doubleVal, float floatVal) { this.doubleVal = doubleVal; ! this.floatVal = floatVal; } @Override public double doubleValue() { return doubleVal; } @Override public float floatValue() { ! return floatVal; } } ! static final ASCIIToBinaryConverter A2BC_POSITIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.POSITIVE_INFINITY, Float.POSITIVE_INFINITY); ! static final ASCIIToBinaryConverter A2BC_NEGATIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY); ! static final ASCIIToBinaryConverter A2BC_NOT_A_NUMBER = new PreparedASCIIToBinaryBuffer(Double.NaN, Float.NaN); ! static final ASCIIToBinaryConverter A2BC_POSITIVE_ZERO = new PreparedASCIIToBinaryBuffer(0.0d, 0.0f); ! static final ASCIIToBinaryConverter A2BC_NEGATIVE_ZERO = new PreparedASCIIToBinaryBuffer(-0.0d, -0.0f); /** * A buffered implementation of <code>ASCIIToBinaryConverter</code>. */ static class ASCIIToBinaryBuffer implements ASCIIToBinaryConverter { boolean isNegative; int decExponent; char digits[]; int nDigits; ASCIIToBinaryBuffer( boolean negSign, int decExponent, char[] digits, int n) { this.isNegative = negSign; this.decExponent = decExponent; this.digits = digits; this.nDigits = n; } ! /* * Takes a FloatingDecimal, which we presumably just scanned in, * and finds out what its value is, as a double. * * AS A SIDE EFFECT, SET roundDir TO INDICATE PREFERRED * ROUNDING DIRECTION in case the result is really destined * for a single-precision float. */ ! @Override ! public double doubleValue() { int kDigits = Math.min(nDigits, MAX_DECIMAL_DIGITS + 1); // // convert the lead kDigits to a long integer. // // (special performance hack: start to do it using int) int iValue = (int) digits[0] - (int) '0'; int iDigits = Math.min(kDigits, INT_DECIMAL_DIGITS); for (int i = 1; i < iDigits; i++) { iValue = iValue * 10 + (int) digits[i] - (int) '0'; } ! long lValue = (long) iValue; for (int i = iDigits; i < kDigits; i++) { lValue = lValue * 10L + (long) ((int) digits[i] - (int) '0'); } ! double dValue = (double) lValue; int exp = decExponent - kDigits; // // lValue now contains a long integer with the value of // the first kDigits digits of the number. // dValue contains the (double) of the same.
*** 1138,1154 **** if (exp <= MAX_SMALL_TEN) { // // Can get the answer with one operation, // thus one roundoff. // ! rValue = dValue * SMALL_10_POW[exp]; ! if (mustSetRoundDir) { ! double tValue = rValue / SMALL_10_POW[exp]; ! roundDir = (tValue == dValue) ? 0 ! : (tValue < dValue) ? 1 ! : -1; ! } return (isNegative) ? -rValue : rValue; } int slop = MAX_DECIMAL_DIGITS - kDigits; if (exp <= MAX_SMALL_TEN + slop) { // --- 1095,1105 ---- if (exp <= MAX_SMALL_TEN) { // // Can get the answer with one operation, // thus one roundoff. // ! double rValue = dValue * SMALL_10_POW[exp]; return (isNegative) ? -rValue : rValue; } int slop = MAX_DECIMAL_DIGITS - kDigits; if (exp <= MAX_SMALL_TEN + slop) { //
*** 1156,1190 **** // and it is still "small" and exact. // Then we can multiply by 10^(exp-slop) // with one rounding. // dValue *= SMALL_10_POW[slop]; ! rValue = dValue * SMALL_10_POW[exp - slop]; ! ! if (mustSetRoundDir) { ! double tValue = rValue / SMALL_10_POW[exp - slop]; ! roundDir = (tValue == dValue) ? 0 ! : (tValue < dValue) ? 1 ! : -1; ! } return (isNegative) ? -rValue : rValue; } // // Else we have a hard case with a positive exp. // } else { if (exp >= -MAX_SMALL_TEN) { // // Can get the answer in one division. // ! rValue = dValue / SMALL_10_POW[-exp]; ! if (mustSetRoundDir) { ! double tValue = rValue * SMALL_10_POW[-exp]; ! roundDir = (tValue == dValue) ? 0 ! : (tValue < dValue) ? 1 ! : -1; ! } return (isNegative) ? -rValue : rValue; } // // Else we have a hard case with a negative exp. // --- 1107,1128 ---- // and it is still "small" and exact. // Then we can multiply by 10^(exp-slop) // with one rounding. // dValue *= SMALL_10_POW[slop]; ! double rValue = dValue * SMALL_10_POW[exp - slop]; return (isNegative) ? -rValue : rValue; } // // Else we have a hard case with a positive exp. // } else { if (exp >= -MAX_SMALL_TEN) { // // Can get the answer in one division. // ! double rValue = dValue / SMALL_10_POW[-exp]; return (isNegative) ? -rValue : rValue; } // // Else we have a hard case with a negative exp. //
*** 1301,1326 **** // The hard part is adjusting it, by comparison // with FDBigInteger arithmetic. // Formulate the EXACT big-number result as // bigD0 * 10^exp // FDBigInteger bigD0 = new FDBigInteger(lValue, digits, kDigits, nDigits); exp = decExponent - nDigits; final int B5 = Math.max(0, -exp); // powers of 5 in bigB, value is not modified inside correctionLoop final int D5 = Math.max(0, exp); // powers of 5 in bigD, value is not modified inside correctionLoop bigD0 = bigD0.multByPow52(D5, 0); bigD0.makeImmutable(); // prevent bigD0 modification inside correctionLoop FDBigInteger bigD = null; int prevD2 = 0; correctionLoop: while (true) { ! // here dValue can't be NaN, Infinity or zero ! long bigBbits = Double.doubleToRawLongBits(dValue) & ~DoubleConsts.SIGN_BIT_MASK; ! int binexp = (int) (bigBbits >>> EXP_SHIFT); ! bigBbits &= DoubleConsts.SIGNIF_BIT_MASK; if (binexp > 0) { bigBbits |= FRACT_HOB; } else { // Normalize denormalized numbers. assert bigBbits != 0L : bigBbits; // doubleToBigInt(0.0) int leadingZeros = Long.numberOfLeadingZeros(bigBbits); --- 1239,1268 ---- // The hard part is adjusting it, by comparison // with FDBigInteger arithmetic. // Formulate the EXACT big-number result as // bigD0 * 10^exp // + if (nDigits > MAX_NDIGITS) { + nDigits = MAX_NDIGITS + 1; + digits[MAX_NDIGITS] = '1'; + } FDBigInteger bigD0 = new FDBigInteger(lValue, digits, kDigits, nDigits); exp = decExponent - nDigits; + long ieeeBits = Double.doubleToRawLongBits(dValue); // IEEE-754 bits of double candidate final int B5 = Math.max(0, -exp); // powers of 5 in bigB, value is not modified inside correctionLoop final int D5 = Math.max(0, exp); // powers of 5 in bigD, value is not modified inside correctionLoop bigD0 = bigD0.multByPow52(D5, 0); bigD0.makeImmutable(); // prevent bigD0 modification inside correctionLoop FDBigInteger bigD = null; int prevD2 = 0; correctionLoop: while (true) { ! // here ieeeBits can't be NaN, Infinity or zero ! int binexp = (int) (ieeeBits >>> EXP_SHIFT); ! long bigBbits = ieeeBits & DoubleConsts.SIGNIF_BIT_MASK; if (binexp > 0) { bigBbits |= FRACT_HOB; } else { // Normalize denormalized numbers. assert bigBbits != 0L : bigBbits; // doubleToBigInt(0.0) int leadingZeros = Long.numberOfLeadingZeros(bigBbits);
*** 1356,1366 **** // halfUlp is still an integer. int hulpbias; if (binexp <= -DoubleConsts.EXP_BIAS) { // This is going to be a denormalized number // (if not actually zero). ! // half an ULP is at 2^-(expBias+EXP_SHIFT+1) hulpbias = binexp + lowOrderZeros + DoubleConsts.EXP_BIAS; } else { hulpbias = 1 + lowOrderZeros; } B2 += hulpbias; --- 1298,1308 ---- // halfUlp is still an integer. int hulpbias; if (binexp <= -DoubleConsts.EXP_BIAS) { // This is going to be a denormalized number // (if not actually zero). ! // half an ULP is at 2^-(DoubleConsts.EXP_BIAS+EXP_SHIFT+1) hulpbias = binexp + lowOrderZeros + DoubleConsts.EXP_BIAS; } else { hulpbias = 1 + lowOrderZeros; } B2 += hulpbias;
*** 1420,1456 **** } cmpResult = diff.cmpPow52(B5, Ulp2); if ((cmpResult) < 0) { // difference is small. // this is close enough - if (mustSetRoundDir) { - roundDir = overvalue ? -1 : 1; - } break correctionLoop; } else if (cmpResult == 0) { // difference is exactly half an ULP // round to some other value maybe, then finish ! dValue += 0.5 * ulp(dValue, overvalue); ! // should check for bigIntNBits == 1 here?? ! if (mustSetRoundDir) { ! roundDir = overvalue ? -1 : 1; } break correctionLoop; } else { // difference is non-trivial. // could scale addend by ratio of difference to // halfUlp here, if we bothered to compute that difference. // Most of the time ( I hope ) it is about 1 anyway. ! dValue += ulp(dValue, overvalue); ! if (dValue == 0.0 || dValue == Double.POSITIVE_INFINITY) { break correctionLoop; // oops. Fell off end of range. } continue; // try again. } } ! return (isNegative) ? -dValue : dValue; } /** * Takes a FloatingDecimal, which we presumably just scanned in, * and finds out what its value is, as a float. --- 1362,1396 ---- } cmpResult = diff.cmpPow52(B5, Ulp2); if ((cmpResult) < 0) { // difference is small. // this is close enough break correctionLoop; } else if (cmpResult == 0) { // difference is exactly half an ULP // round to some other value maybe, then finish ! if ((ieeeBits & 1) != 0) { // half ties to even ! ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp } break correctionLoop; } else { // difference is non-trivial. // could scale addend by ratio of difference to // halfUlp here, if we bothered to compute that difference. // Most of the time ( I hope ) it is about 1 anyway. ! ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp ! if (ieeeBits == 0 || ieeeBits == DoubleConsts.EXP_BIT_MASK) { // 0.0 or Double.POSITIVE_INFINITY break correctionLoop; // oops. Fell off end of range. } continue; // try again. } } ! if (isNegative) { ! ieeeBits |= DoubleConsts.SIGN_BIT_MASK; ! } ! return Double.longBitsToDouble(ieeeBits); } /** * Takes a FloatingDecimal, which we presumably just scanned in, * and finds out what its value is, as a float.
*** 1459,1480 **** * to double has one rounding error, and the conversion of that double * to a float has another rounding error, IN THE WRONG DIRECTION, * ( because of the preference to a zero low-order bit ). */ @Override ! public strictfp float floatValue() { int kDigits = Math.min(nDigits, SINGLE_MAX_DECIMAL_DIGITS + 1); - int iValue; - float fValue; // // convert the lead kDigits to an integer. // ! iValue = (int) digits[0] - (int) '0'; for (int i = 1; i < kDigits; i++) { iValue = iValue * 10 + (int) digits[i] - (int) '0'; } ! fValue = (float) iValue; int exp = decExponent - kDigits; // // iValue now contains an integer with the value of // the first kDigits digits of the number. // fValue contains the (float) of the same. --- 1399,1418 ---- * to double has one rounding error, and the conversion of that double * to a float has another rounding error, IN THE WRONG DIRECTION, * ( because of the preference to a zero low-order bit ). */ @Override ! public float floatValue() { int kDigits = Math.min(nDigits, SINGLE_MAX_DECIMAL_DIGITS + 1); // // convert the lead kDigits to an integer. // ! int iValue = (int) digits[0] - (int) '0'; for (int i = 1; i < kDigits; i++) { iValue = iValue * 10 + (int) digits[i] - (int) '0'; } ! float fValue = (float) iValue; int exp = decExponent - kDigits; // // iValue now contains an integer with the value of // the first kDigits digits of the number. // fValue contains the (float) of the same.
*** 1503,1513 **** return (isNegative) ? -fValue : fValue; } int slop = SINGLE_MAX_DECIMAL_DIGITS - kDigits; if (exp <= SINGLE_MAX_SMALL_TEN + slop) { // ! // We can multiply dValue by 10^(slop) // and it is still "small" and exact. // Then we can multiply by 10^(exp-slop) // with one rounding. // fValue *= SINGLE_SMALL_10_POW[slop]; --- 1441,1451 ---- return (isNegative) ? -fValue : fValue; } int slop = SINGLE_MAX_DECIMAL_DIGITS - kDigits; if (exp <= SINGLE_MAX_SMALL_TEN + slop) { // ! // We can multiply fValue by 10^(slop) // and it is still "small" and exact. // Then we can multiply by 10^(exp-slop) // with one rounding. // fValue *= SINGLE_SMALL_10_POW[slop];
*** 1553,1594 **** // // Harder cases: // The sum of digits plus exponent is greater than // what we think we can do with one error. // ! // Start by weeding out obviously out-of-range ! // results, then convert to double and go to ! // common hard-case code. // if (decExponent > SINGLE_MAX_DECIMAL_EXPONENT + 1) { // // Lets face it. This is going to be // Infinity. Cut to the chase. // return (isNegative) ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; ! } else if (decExponent < SINGLE_MIN_DECIMAL_EXPONENT - 1) { // // Lets face it. This is going to be // zero. Cut to the chase. // return (isNegative) ? -0.0f : 0.0f; } // ! // Here, we do 'way too much work, but throwing away ! // our partial results, and going and doing the whole ! // thing as double, then throwing away half the bits that computes ! // when we convert back to float. ! // ! // The alternative is to reproduce the whole multiple-precision ! // algorithm for float precision, or to try to parameterize it ! // for common usage. The former will take about 400 lines of code, ! // and the latter I tried without success. Thus the semi-hack ! // answer here. // ! double dValue = doubleValue(true); ! return stickyRound(dValue, roundDir); } /** * All the positive powers of 10 that can be --- 1491,1702 ---- // // Harder cases: // The sum of digits plus exponent is greater than // what we think we can do with one error. // ! // Start by approximating the right answer by, ! // naively, scaling by powers of 10. ! // Scaling uses doubles to avoid overflow/underflow. // + double dValue = fValue; + if (exp > 0) { if (decExponent > SINGLE_MAX_DECIMAL_EXPONENT + 1) { // // Lets face it. This is going to be // Infinity. Cut to the chase. // return (isNegative) ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; ! } ! if ((exp & 15) != 0) { ! dValue *= SMALL_10_POW[exp & 15]; ! } ! if ((exp >>= 4) != 0) { ! int j; ! for (j = 0; exp > 0; j++, exp >>= 1) { ! if ((exp & 1) != 0) { ! dValue *= BIG_10_POW[j]; ! } ! } ! } ! } else if (exp < 0) { ! exp = -exp; ! if (decExponent < SINGLE_MIN_DECIMAL_EXPONENT - 1) { // // Lets face it. This is going to be // zero. Cut to the chase. // return (isNegative) ? -0.0f : 0.0f; } + if ((exp & 15) != 0) { + dValue /= SMALL_10_POW[exp & 15]; + } + if ((exp >>= 4) != 0) { + int j; + for (j = 0; exp > 0; j++, exp >>= 1) { + if ((exp & 1) != 0) { + dValue *= TINY_10_POW[j]; + } + } + } + } + fValue = Math.max(Float.MIN_VALUE, Math.min(Float.MAX_VALUE, (float) dValue)); // ! // fValue is now approximately the result. ! // The hard part is adjusting it, by comparison ! // with FDBigInteger arithmetic. ! // Formulate the EXACT big-number result as ! // bigD0 * 10^exp // ! if (nDigits > SINGLE_MAX_NDIGITS) { ! nDigits = SINGLE_MAX_NDIGITS + 1; ! digits[SINGLE_MAX_NDIGITS] = '1'; ! } ! FDBigInteger bigD0 = new FDBigInteger(iValue, digits, kDigits, nDigits); ! exp = decExponent - nDigits; ! ! int ieeeBits = Float.floatToRawIntBits(fValue); // IEEE-754 bits of float candidate ! final int B5 = Math.max(0, -exp); // powers of 5 in bigB, value is not modified inside correctionLoop ! final int D5 = Math.max(0, exp); // powers of 5 in bigD, value is not modified inside correctionLoop ! bigD0 = bigD0.multByPow52(D5, 0); ! bigD0.makeImmutable(); // prevent bigD0 modification inside correctionLoop ! FDBigInteger bigD = null; ! int prevD2 = 0; ! ! correctionLoop: ! while (true) { ! // here ieeeBits can't be NaN, Infinity or zero ! int binexp = ieeeBits >>> SINGLE_EXP_SHIFT; ! int bigBbits = ieeeBits & FloatConsts.SIGNIF_BIT_MASK; ! if (binexp > 0) { ! bigBbits |= SINGLE_FRACT_HOB; ! } else { // Normalize denormalized numbers. ! assert bigBbits != 0 : bigBbits; // floatToBigInt(0.0) ! int leadingZeros = Integer.numberOfLeadingZeros(bigBbits); ! int shift = leadingZeros - (31 - SINGLE_EXP_SHIFT); ! bigBbits <<= shift; ! binexp = 1 - shift; ! } ! binexp -= FloatConsts.EXP_BIAS; ! int lowOrderZeros = Integer.numberOfTrailingZeros(bigBbits); ! bigBbits >>>= lowOrderZeros; ! final int bigIntExp = binexp - SINGLE_EXP_SHIFT + lowOrderZeros; ! final int bigIntNBits = SINGLE_EXP_SHIFT + 1 - lowOrderZeros; ! ! // ! // Scale bigD, bigB appropriately for ! // big-integer operations. ! // Naively, we multiply by powers of ten ! // and powers of two. What we actually do ! // is keep track of the powers of 5 and ! // powers of 2 we would use, then factor out ! // common divisors before doing the work. ! // ! int B2 = B5; // powers of 2 in bigB ! int D2 = D5; // powers of 2 in bigD ! int Ulp2; // powers of 2 in halfUlp. ! if (bigIntExp >= 0) { ! B2 += bigIntExp; ! } else { ! D2 -= bigIntExp; ! } ! Ulp2 = B2; ! // shift bigB and bigD left by a number s. t. ! // halfUlp is still an integer. ! int hulpbias; ! if (binexp <= -FloatConsts.EXP_BIAS) { ! // This is going to be a denormalized number ! // (if not actually zero). ! // half an ULP is at 2^-(FloatConsts.EXP_BIAS+SINGLE_EXP_SHIFT+1) ! hulpbias = binexp + lowOrderZeros + FloatConsts.EXP_BIAS; ! } else { ! hulpbias = 1 + lowOrderZeros; ! } ! B2 += hulpbias; ! D2 += hulpbias; ! // if there are common factors of 2, we might just as well ! // factor them out, as they add nothing useful. ! int common2 = Math.min(B2, Math.min(D2, Ulp2)); ! B2 -= common2; ! D2 -= common2; ! Ulp2 -= common2; ! // do multiplications by powers of 5 and 2 ! FDBigInteger bigB = FDBigInteger.valueOfMulPow52(bigBbits, B5, B2); ! if (bigD == null || prevD2 != D2) { ! bigD = bigD0.leftShift(D2); ! prevD2 = D2; ! } ! // ! // to recap: ! // bigB is the scaled-big-int version of our floating-point ! // candidate. ! // bigD is the scaled-big-int version of the exact value ! // as we understand it. ! // halfUlp is 1/2 an ulp of bigB, except for special cases ! // of exact powers of 2 ! // ! // the plan is to compare bigB with bigD, and if the difference ! // is less than halfUlp, then we're satisfied. Otherwise, ! // use the ratio of difference to halfUlp to calculate a fudge ! // factor to add to the floating value, then go 'round again. ! // ! FDBigInteger diff; ! int cmpResult; ! boolean overvalue; ! if ((cmpResult = bigB.cmp(bigD)) > 0) { ! overvalue = true; // our candidate is too big. ! diff = bigB.leftInplaceSub(bigD); // bigB is not user further - reuse ! if ((bigIntNBits == 1) && (bigIntExp > -FloatConsts.EXP_BIAS + 1)) { ! // candidate is a normalized exact power of 2 and ! // is too big (larger than Float.MIN_NORMAL). We will be subtracting. ! // For our purposes, ulp is the ulp of the ! // next smaller range. ! Ulp2 -= 1; ! if (Ulp2 < 0) { ! // rats. Cannot de-scale ulp this far. ! // must scale diff in other direction. ! Ulp2 = 0; ! diff = diff.leftShift(1); ! } ! } ! } else if (cmpResult < 0) { ! overvalue = false; // our candidate is too small. ! diff = bigD.rightInplaceSub(bigB); // bigB is not user further - reuse ! } else { ! // the candidate is exactly right! ! // this happens with surprising frequency ! break correctionLoop; ! } ! cmpResult = diff.cmpPow52(B5, Ulp2); ! if ((cmpResult) < 0) { ! // difference is small. ! // this is close enough ! break correctionLoop; ! } else if (cmpResult == 0) { ! // difference is exactly half an ULP ! // round to some other value maybe, then finish ! if ((ieeeBits & 1) != 0) { // half ties to even ! ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp ! } ! break correctionLoop; ! } else { ! // difference is non-trivial. ! // could scale addend by ratio of difference to ! // halfUlp here, if we bothered to compute that difference. ! // Most of the time ( I hope ) it is about 1 anyway. ! ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp ! if (ieeeBits == 0 || ieeeBits == FloatConsts.EXP_BIT_MASK) { // 0.0 or Float.POSITIVE_INFINITY ! break correctionLoop; // oops. Fell off end of range. ! } ! continue; // try again. ! } ! ! } ! if (isNegative) { ! ieeeBits |= FloatConsts.SIGN_BIT_MASK; ! } ! return Float.intBitsToFloat(ieeeBits); } /** * All the positive powers of 10 that can be
*** 1933,1968 **** return new ASCIIToBinaryBuffer(isNegative, decExp, digits, nDigits); } catch ( StringIndexOutOfBoundsException e ){ } throw new NumberFormatException("For input string: \"" + in + "\""); } - /** - * Rounds a double to a float. - * In addition to the fraction bits of the double, - * look at the class instance variable roundDir, - * which should help us avoid double-rounding error. - * roundDir was set in hardValueOf if the estimate was - * close enough, but not exact. It tells us which direction - * of rounding is preferred. - */ - static float stickyRound( double dval, int roundDirection ){ - if(roundDirection!=0) { - long lbits = Double.doubleToRawLongBits( dval ); - long binexp = lbits & DoubleConsts.EXP_BIT_MASK; - if ( binexp == 0L || binexp == DoubleConsts.EXP_BIT_MASK ){ - // what we have here is special. - // don't worry, the right thing will happen. - return (float) dval; - } - lbits += (long)roundDirection; // hack-o-matic. - return (float)Double.longBitsToDouble( lbits ); - } else { - return (float)dval; - } - } - - private static class HexFloatPattern { /** * Grammar is compatible with hexadecimal floating-point constants * described in section 6.4.4.2 of the C99 specification. */ --- 2041,2050 ----
*** 2280,2289 **** --- 2362,2404 ---- } // else all of string was seen, round and sticky are // correct as false. + // Float calculations + int floatBits = isNegative ? FloatConsts.SIGN_BIT_MASK : 0; + if (exponent >= FloatConsts.MIN_EXPONENT) { + if (exponent > FloatConsts.MAX_EXPONENT) { + // Float.POSITIVE_INFINITY + floatBits |= FloatConsts.EXP_BIT_MASK; + } else { + int threshShift = DoubleConsts.SIGNIFICAND_WIDTH - FloatConsts.SIGNIFICAND_WIDTH - 1; + boolean floatSticky = (significand & ((1L << threshShift) - 1)) != 0 || round || sticky; + int iValue = (int) (significand >>> threshShift); + if ((iValue & 3) != 1 || floatSticky) { + iValue++; + } + floatBits |= (((((int) exponent) + (FloatConsts.EXP_BIAS - 1))) << SINGLE_EXP_SHIFT) + (iValue >> 1); + } + } else { + if (exponent < FloatConsts.MIN_SUB_EXPONENT - 1) { + // 0 + } else { + // exponent == -127 ==> threshShift = 53 - 2 + (-149) - (-127) = 53 - 24 + int threshShift = (int) ((DoubleConsts.SIGNIFICAND_WIDTH - 2 + FloatConsts.MIN_SUB_EXPONENT) - exponent); + assert threshShift >= DoubleConsts.SIGNIFICAND_WIDTH - FloatConsts.SIGNIFICAND_WIDTH; + assert threshShift < DoubleConsts.SIGNIFICAND_WIDTH; + boolean floatSticky = (significand & ((1L << threshShift) - 1)) != 0 || round || sticky; + int iValue = (int) (significand >>> threshShift); + if ((iValue & 3) != 1 || floatSticky) { + iValue++; + } + floatBits |= iValue >> 1; + } + } + float fValue = Float.intBitsToFloat(floatBits); + // Check for overflow and update exponent accordingly. if (exponent > DoubleConsts.MAX_EXPONENT) { // Infinite result // overflow to properly signed infinity return isNegative ? A2BC_NEGATIVE_INFINITY : A2BC_POSITIVE_INFINITY; } else { // Finite return value
*** 2388,2478 **** double value = isNegative ? Double.longBitsToDouble(significand | DoubleConsts.SIGN_BIT_MASK) : Double.longBitsToDouble(significand ); ! int roundDir = 0; ! // ! // Set roundingDir variable field of fd properly so ! // that the input string can be properly rounded to a ! // float value. There are two cases to consider: ! // ! // 1. rounding to double discards sticky bit ! // information that would change the result of a float ! // rounding (near halfway case between two floats) ! // ! // 2. rounding to double rounds up when rounding up ! // would not occur when rounding to float. ! // ! // For former case only needs to be considered when ! // the bits rounded away when casting to float are all ! // zero; otherwise, float round bit is properly set ! // and sticky will already be true. ! // ! // The lower exponent bound for the code below is the ! // minimum (normalized) subnormal exponent - 1 since a ! // value with that exponent can round up to the ! // minimum subnormal value and the sticky bit ! // information must be preserved (i.e. case 1). ! // ! if ((exponent >= FloatConsts.MIN_SUB_EXPONENT - 1) && ! (exponent <= FloatConsts.MAX_EXPONENT)) { ! // Outside above exponent range, the float value ! // will be zero or infinity. ! ! // ! // If the low-order 28 bits of a rounded double ! // significand are 0, the double could be a ! // half-way case for a rounding to float. If the ! // double value is a half-way case, the double ! // significand may have to be modified to round ! // the the right float value (see the stickyRound ! // method). If the rounding to double has lost ! // what would be float sticky bit information, the ! // double significand must be incremented. If the ! // double value's significand was itself ! // incremented, the float value may end up too ! // large so the increment should be undone. ! // ! if ((significand & 0xfffffffL) == 0x0L) { ! // For negative values, the sign of the ! // roundDir is the same as for positive values ! // since adding 1 increasing the significand's ! // magnitude and subtracting 1 decreases the ! // significand's magnitude. If neither round ! // nor sticky is true, the double value is ! // exact and no adjustment is required for a ! // proper float rounding. ! if (round || sticky) { ! if (leastZero) { // prerounding lsb is 0 ! // If round and sticky were both true, ! // and the least significant ! // significand bit were 0, the rounded ! // significand would not have its ! // low-order bits be zero. Therefore, ! // we only need to adjust the ! // significand if round XOR sticky is ! // true. ! if (round ^ sticky) { ! roundDir = 1; ! } ! } else { // prerounding lsb is 1 ! // If the prerounding lsb is 1 and the ! // resulting significand has its ! // low-order bits zero, the significand ! // was incremented. Here, we undo the ! // increment, which will ensure the ! // right guard and sticky bits for the ! // float rounding. ! if (round) { ! roundDir = -1; ! } ! } ! } ! } ! } ! return new PreparedASCIIToBinaryBuffer(value,roundDir); } } } /** --- 2503,2513 ---- double value = isNegative ? Double.longBitsToDouble(significand | DoubleConsts.SIGN_BIT_MASK) : Double.longBitsToDouble(significand ); ! return new PreparedASCIIToBinaryBuffer(value, fValue); } } } /**