## src/share/classes/sun/misc/FloatingDecimal.java

Print this page
```rev 7487 : 7192954: Fix Float.parseFloat to round correctly and preserve monotonicity.
4396272: Parsing doubles fails to follow IEEE for largest decimal that should yield 0
7039391: Use Math.ulp in FloatingDecimal
Summary: Correct rounding and monotonicity problems in floats and doubles
Reviewed-by: martin
Contributed-by: Dmitry Nadezhin <dmitry.nadezhin@oracle.com>, Louis Wasserman <lowasser@google.com>```
 ``` `````` 32 * A class for converting between ASCII and decimal representations of a single 33 * or double precision floating point number. Most conversions are provided via 34 * static convenience methods, although a BinaryToASCIIConverter 35 * instance may be obtained and reused. 36 */ 37 public class FloatingDecimal{ 38 // 39 // Constants of the implementation; 40 // most are IEEE-754 related. 41 // (There are more really boring constants at the end.) 42 // 43 static final int EXP_SHIFT = DoubleConsts.SIGNIFICAND_WIDTH - 1; 44 static final long FRACT_HOB = ( 1L<String. 63 * 64 * @param d The double precision value. 65 * @return The value converted to a String. 66 */ 67 public static String toJavaFormatString(double d) { 68 return getBinaryToASCIIConverter(d).toJavaFormatString(); 69 } 70 71 /** 72 * Converts a single precision floating point value to a String. 73 * 74 * @param f The single precision value. 75 * @return The value converted to a String. 76 */ 77 public static String toJavaFormatString(float f) { `````` 985 } 986 987 /** 988 * A converter which can process an ASCII String representation 989 * of a single or double precision floating point value into a 990 * float or a double. 991 */ 992 interface ASCIIToBinaryConverter { 993 994 double doubleValue(); 995 996 float floatValue(); 997 998 } 999 1000 /** 1001 * A ASCIIToBinaryConverter container for a double. 1002 */ 1003 static class PreparedASCIIToBinaryBuffer implements ASCIIToBinaryConverter { 1004 final private double doubleVal; 1005 private int roundDir = 0; 1006 1007 public PreparedASCIIToBinaryBuffer(double doubleVal) { 1008 this.doubleVal = doubleVal; 1009 } 1010 1011 public PreparedASCIIToBinaryBuffer(double doubleVal, int roundDir) { 1012 this.doubleVal = doubleVal; 1013 this.roundDir = roundDir; 1014 } 1015 1016 @Override 1017 public double doubleValue() { 1018 return doubleVal; 1019 } 1020 1021 @Override 1022 public float floatValue() { 1023 return stickyRound(doubleVal,roundDir); 1024 } 1025 } 1026 1027 static final ASCIIToBinaryConverter A2BC_POSITIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.POSITIVE_INFINITY); 1028 static final ASCIIToBinaryConverter A2BC_NEGATIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.NEGATIVE_INFINITY); 1029 static final ASCIIToBinaryConverter A2BC_NOT_A_NUMBER = new PreparedASCIIToBinaryBuffer(Double.NaN); 1030 static final ASCIIToBinaryConverter A2BC_POSITIVE_ZERO = new PreparedASCIIToBinaryBuffer(0.0d); 1031 static final ASCIIToBinaryConverter A2BC_NEGATIVE_ZERO = new PreparedASCIIToBinaryBuffer(-0.0d); 1032 1033 /** 1034 * A buffered implementation of ASCIIToBinaryConverter. 1035 */ 1036 static class ASCIIToBinaryBuffer implements ASCIIToBinaryConverter { 1037 boolean isNegative; 1038 int decExponent; 1039 char digits[]; 1040 int nDigits; 1041 int roundDir = 0; // set by doubleValue 1042 1043 ASCIIToBinaryBuffer( boolean negSign, int decExponent, char[] digits, int n) 1044 { 1045 this.isNegative = negSign; 1046 this.decExponent = decExponent; 1047 this.digits = digits; 1048 this.nDigits = n; 1049 } 1050 1051 @Override 1052 public double doubleValue() { 1053 return doubleValue(false); 1054 } 1055 1056 /** 1057 * Computes a number that is the ULP of the given value, 1058 * for purposes of addition/subtraction. Generally easy. 1059 * More difficult if subtracting and the argument 1060 * is a normalized a power of 2, as the ULP changes at these points. 1061 */ 1062 private static double ulp(double dval, boolean subtracting) { 1063 long lbits = Double.doubleToLongBits(dval) & ~DoubleConsts.SIGN_BIT_MASK; 1064 int binexp = (int) (lbits >>> EXP_SHIFT); 1065 double ulpval; 1066 if (subtracting && (binexp >= EXP_SHIFT) && ((lbits & DoubleConsts.SIGNIF_BIT_MASK) == 0L)) { 1067 // for subtraction from normalized, powers of 2, 1068 // use next-smaller exponent 1069 binexp -= 1; 1070 } 1071 if (binexp > EXP_SHIFT) { 1072 ulpval = Double.longBitsToDouble(((long) (binexp - EXP_SHIFT)) << EXP_SHIFT); 1073 } else if (binexp == 0) { 1074 ulpval = Double.MIN_VALUE; 1075 } else { 1076 ulpval = Double.longBitsToDouble(1L << (binexp - 1)); 1077 } 1078 if (subtracting) { 1079 ulpval = -ulpval; 1080 } 1081 1082 return ulpval; 1083 } 1084 1085 /** 1086 * Takes a FloatingDecimal, which we presumably just scanned in, 1087 * and finds out what its value is, as a double. 1088 * 1089 * AS A SIDE EFFECT, SET roundDir TO INDICATE PREFERRED 1090 * ROUNDING DIRECTION in case the result is really destined 1091 * for a single-precision float. 1092 */ 1093 private strictfp double doubleValue(boolean mustSetRoundDir) { 1094 int kDigits = Math.min(nDigits, MAX_DECIMAL_DIGITS + 1); 1095 long lValue; 1096 double dValue; 1097 double rValue; 1098 1099 if (mustSetRoundDir) { 1100 roundDir = 0; 1101 } 1102 // 1103 // convert the lead kDigits to a long integer. 1104 // 1105 // (special performance hack: start to do it using int) 1106 int iValue = (int) digits[0] - (int) '0'; 1107 int iDigits = Math.min(kDigits, INT_DECIMAL_DIGITS); 1108 for (int i = 1; i < iDigits; i++) { 1109 iValue = iValue * 10 + (int) digits[i] - (int) '0'; 1110 } 1111 lValue = (long) iValue; 1112 for (int i = iDigits; i < kDigits; i++) { 1113 lValue = lValue * 10L + (long) ((int) digits[i] - (int) '0'); 1114 } 1115 dValue = (double) lValue; 1116 int exp = decExponent - kDigits; 1117 // 1118 // lValue now contains a long integer with the value of 1119 // the first kDigits digits of the number. 1120 // dValue contains the (double) of the same. 1121 // 1122 1123 if (nDigits <= MAX_DECIMAL_DIGITS) { 1124 // 1125 // possibly an easy case. 1126 // We know that the digits can be represented 1127 // exactly. And if the exponent isn't too outrageous, 1128 // the whole thing can be done with one operation, 1129 // thus one rounding error. 1130 // Note that all our constructors trim all leading and 1131 // trailing zeros, so simple values (including zero) 1132 // will always end up here 1133 // 1134 if (exp == 0 || dValue == 0.0) { 1135 return (isNegative) ? -dValue : dValue; // small floating integer 1136 } 1137 else if (exp >= 0) { 1138 if (exp <= MAX_SMALL_TEN) { 1139 // 1140 // Can get the answer with one operation, 1141 // thus one roundoff. 1142 // 1143 rValue = dValue * SMALL_10_POW[exp]; 1144 if (mustSetRoundDir) { 1145 double tValue = rValue / SMALL_10_POW[exp]; 1146 roundDir = (tValue == dValue) ? 0 1147 : (tValue < dValue) ? 1 1148 : -1; 1149 } 1150 return (isNegative) ? -rValue : rValue; 1151 } 1152 int slop = MAX_DECIMAL_DIGITS - kDigits; 1153 if (exp <= MAX_SMALL_TEN + slop) { 1154 // 1155 // We can multiply dValue by 10^(slop) 1156 // and it is still "small" and exact. 1157 // Then we can multiply by 10^(exp-slop) 1158 // with one rounding. 1159 // 1160 dValue *= SMALL_10_POW[slop]; 1161 rValue = dValue * SMALL_10_POW[exp - slop]; 1162 1163 if (mustSetRoundDir) { 1164 double tValue = rValue / SMALL_10_POW[exp - slop]; 1165 roundDir = (tValue == dValue) ? 0 1166 : (tValue < dValue) ? 1 1167 : -1; 1168 } 1169 return (isNegative) ? -rValue : rValue; 1170 } 1171 // 1172 // Else we have a hard case with a positive exp. 1173 // 1174 } else { 1175 if (exp >= -MAX_SMALL_TEN) { 1176 // 1177 // Can get the answer in one division. 1178 // 1179 rValue = dValue / SMALL_10_POW[-exp]; 1180 if (mustSetRoundDir) { 1181 double tValue = rValue * SMALL_10_POW[-exp]; 1182 roundDir = (tValue == dValue) ? 0 1183 : (tValue < dValue) ? 1 1184 : -1; 1185 } 1186 return (isNegative) ? -rValue : rValue; 1187 } 1188 // 1189 // Else we have a hard case with a negative exp. 1190 // 1191 } 1192 } 1193 1194 // 1195 // Harder cases: 1196 // The sum of digits plus exponent is greater than 1197 // what we think we can do with one error. 1198 // 1199 // Start by approximating the right answer by, 1200 // naively, scaling by powers of 10. 1201 // 1202 if (exp > 0) { 1203 if (decExponent > MAX_DECIMAL_EXPONENT + 1) { 1204 // 1205 // Lets face it. This is going to be ``````1286 // Double.MIN_VALUE ). 1287 // 1288 t = dValue * 2.0; 1289 t *= TINY_10_POW[j]; 1290 if (t == 0.0) { 1291 return (isNegative) ? -0.0 : 0.0; 1292 } 1293 t = Double.MIN_VALUE; 1294 } 1295 dValue = t; 1296 } 1297 } 1298 1299 // 1300 // dValue is now approximately the result. 1301 // The hard part is adjusting it, by comparison 1302 // with FDBigInteger arithmetic. 1303 // Formulate the EXACT big-number result as 1304 // bigD0 * 10^exp 1305 // 1306 FDBigInteger bigD0 = new FDBigInteger(lValue, digits, kDigits, nDigits); 1307 exp = decExponent - nDigits; 1308 1309 final int B5 = Math.max(0, -exp); // powers of 5 in bigB, value is not modified inside correctionLoop 1310 final int D5 = Math.max(0, exp); // powers of 5 in bigD, value is not modified inside correctionLoop 1311 bigD0 = bigD0.multByPow52(D5, 0); 1312 bigD0.makeImmutable(); // prevent bigD0 modification inside correctionLoop 1313 FDBigInteger bigD = null; 1314 int prevD2 = 0; 1315 1316 correctionLoop: 1317 while (true) { 1318 // here dValue can't be NaN, Infinity or zero 1319 long bigBbits = Double.doubleToRawLongBits(dValue) & ~DoubleConsts.SIGN_BIT_MASK; 1320 int binexp = (int) (bigBbits >>> EXP_SHIFT); 1321 bigBbits &= DoubleConsts.SIGNIF_BIT_MASK; 1322 if (binexp > 0) { 1323 bigBbits |= FRACT_HOB; 1324 } else { // Normalize denormalized numbers. 1325 assert bigBbits != 0L : bigBbits; // doubleToBigInt(0.0) 1326 int leadingZeros = Long.numberOfLeadingZeros(bigBbits); 1327 int shift = leadingZeros - (63 - EXP_SHIFT); 1328 bigBbits <<= shift; 1329 binexp = 1 - shift; 1330 } 1331 binexp -= DoubleConsts.EXP_BIAS; 1332 int lowOrderZeros = Long.numberOfTrailingZeros(bigBbits); 1333 bigBbits >>>= lowOrderZeros; 1334 final int bigIntExp = binexp - EXP_SHIFT + lowOrderZeros; 1335 final int bigIntNBits = EXP_SHIFT + 1 - lowOrderZeros; 1336 1337 // 1338 // Scale bigD, bigB appropriately for 1339 // big-integer operations. 1340 // Naively, we multiply by powers of ten 1341 // and powers of two. What we actually do 1342 // is keep track of the powers of 5 and 1343 // powers of 2 we would use, then factor out 1344 // common divisors before doing the work. 1345 // 1346 int B2 = B5; // powers of 2 in bigB 1347 int D2 = D5; // powers of 2 in bigD 1348 int Ulp2; // powers of 2 in halfUlp. 1349 if (bigIntExp >= 0) { 1350 B2 += bigIntExp; 1351 } else { 1352 D2 -= bigIntExp; 1353 } 1354 Ulp2 = B2; 1355 // shift bigB and bigD left by a number s. t. 1356 // halfUlp is still an integer. 1357 int hulpbias; 1358 if (binexp <= -DoubleConsts.EXP_BIAS) { 1359 // This is going to be a denormalized number 1360 // (if not actually zero). 1361 // half an ULP is at 2^-(expBias+EXP_SHIFT+1) 1362 hulpbias = binexp + lowOrderZeros + DoubleConsts.EXP_BIAS; 1363 } else { 1364 hulpbias = 1 + lowOrderZeros; 1365 } 1366 B2 += hulpbias; 1367 D2 += hulpbias; 1368 // if there are common factors of 2, we might just as well 1369 // factor them out, as they add nothing useful. 1370 int common2 = Math.min(B2, Math.min(D2, Ulp2)); 1371 B2 -= common2; 1372 D2 -= common2; 1373 Ulp2 -= common2; 1374 // do multiplications by powers of 5 and 2 1375 FDBigInteger bigB = FDBigInteger.valueOfMulPow52(bigBbits, B5, B2); 1376 if (bigD == null || prevD2 != D2) { 1377 bigD = bigD0.leftShift(D2); 1378 prevD2 = D2; 1379 } 1380 // 1381 // to recap: ``````1405 Ulp2 -= 1; 1406 if (Ulp2 < 0) { 1407 // rats. Cannot de-scale ulp this far. 1408 // must scale diff in other direction. 1409 Ulp2 = 0; 1410 diff = diff.leftShift(1); 1411 } 1412 } 1413 } else if (cmpResult < 0) { 1414 overvalue = false; // our candidate is too small. 1415 diff = bigD.rightInplaceSub(bigB); // bigB is not user further - reuse 1416 } else { 1417 // the candidate is exactly right! 1418 // this happens with surprising frequency 1419 break correctionLoop; 1420 } 1421 cmpResult = diff.cmpPow52(B5, Ulp2); 1422 if ((cmpResult) < 0) { 1423 // difference is small. 1424 // this is close enough 1425 if (mustSetRoundDir) { 1426 roundDir = overvalue ? -1 : 1; 1427 } 1428 break correctionLoop; 1429 } else if (cmpResult == 0) { 1430 // difference is exactly half an ULP 1431 // round to some other value maybe, then finish 1432 dValue += 0.5 * ulp(dValue, overvalue); 1433 // should check for bigIntNBits == 1 here?? 1434 if (mustSetRoundDir) { 1435 roundDir = overvalue ? -1 : 1; 1436 } 1437 break correctionLoop; 1438 } else { 1439 // difference is non-trivial. 1440 // could scale addend by ratio of difference to 1441 // halfUlp here, if we bothered to compute that difference. 1442 // Most of the time ( I hope ) it is about 1 anyway. 1443 dValue += ulp(dValue, overvalue); 1444 if (dValue == 0.0 || dValue == Double.POSITIVE_INFINITY) { 1445 break correctionLoop; // oops. Fell off end of range. 1446 } 1447 continue; // try again. 1448 } 1449 1450 } 1451 return (isNegative) ? -dValue : dValue; 1452 } 1453 1454 /** 1455 * Takes a FloatingDecimal, which we presumably just scanned in, 1456 * and finds out what its value is, as a float. 1457 * This is distinct from doubleValue() to avoid the extremely 1458 * unlikely case of a double rounding error, wherein the conversion 1459 * to double has one rounding error, and the conversion of that double 1460 * to a float has another rounding error, IN THE WRONG DIRECTION, 1461 * ( because of the preference to a zero low-order bit ). 1462 */ 1463 @Override 1464 public strictfp float floatValue() { 1465 int kDigits = Math.min(nDigits, SINGLE_MAX_DECIMAL_DIGITS + 1); 1466 int iValue; 1467 float fValue; 1468 // 1469 // convert the lead kDigits to an integer. 1470 // 1471 iValue = (int) digits[0] - (int) '0'; 1472 for (int i = 1; i < kDigits; i++) { 1473 iValue = iValue * 10 + (int) digits[i] - (int) '0'; 1474 } 1475 fValue = (float) iValue; 1476 int exp = decExponent - kDigits; 1477 // 1478 // iValue now contains an integer with the value of 1479 // the first kDigits digits of the number. 1480 // fValue contains the (float) of the same. 1481 // 1482 1483 if (nDigits <= SINGLE_MAX_DECIMAL_DIGITS) { 1484 // 1485 // possibly an easy case. 1486 // We know that the digits can be represented 1487 // exactly. And if the exponent isn't too outrageous, 1488 // the whole thing can be done with one operation, 1489 // thus one rounding error. 1490 // Note that all our constructors trim all leading and 1491 // trailing zeros, so simple values (including zero) 1492 // will always end up here. 1493 // 1494 if (exp == 0 || fValue == 0.0f) { 1495 return (isNegative) ? -fValue : fValue; // small floating integer 1496 } else if (exp >= 0) { 1497 if (exp <= SINGLE_MAX_SMALL_TEN) { 1498 // 1499 // Can get the answer with one operation, 1500 // thus one roundoff. 1501 // 1502 fValue *= SINGLE_SMALL_10_POW[exp]; 1503 return (isNegative) ? -fValue : fValue; 1504 } 1505 int slop = SINGLE_MAX_DECIMAL_DIGITS - kDigits; 1506 if (exp <= SINGLE_MAX_SMALL_TEN + slop) { 1507 // 1508 // We can multiply dValue by 10^(slop) 1509 // and it is still "small" and exact. 1510 // Then we can multiply by 10^(exp-slop) 1511 // with one rounding. 1512 // 1513 fValue *= SINGLE_SMALL_10_POW[slop]; 1514 fValue *= SINGLE_SMALL_10_POW[exp - slop]; 1515 return (isNegative) ? -fValue : fValue; 1516 } 1517 // 1518 // Else we have a hard case with a positive exp. 1519 // 1520 } else { 1521 if (exp >= -SINGLE_MAX_SMALL_TEN) { 1522 // 1523 // Can get the answer in one division. 1524 // 1525 fValue /= SINGLE_SMALL_10_POW[-exp]; 1526 return (isNegative) ? -fValue : fValue; 1527 } 1528 // ``````1538 // First, finish accumulating digits. 1539 // Then convert that integer to a double, multiply 1540 // by the appropriate power of ten, and convert to float. 1541 // 1542 long lValue = (long) iValue; 1543 for (int i = kDigits; i < nDigits; i++) { 1544 lValue = lValue * 10L + (long) ((int) digits[i] - (int) '0'); 1545 } 1546 double dValue = (double) lValue; 1547 exp = decExponent - nDigits; 1548 dValue *= SMALL_10_POW[exp]; 1549 fValue = (float) dValue; 1550 return (isNegative) ? -fValue : fValue; 1551 1552 } 1553 // 1554 // Harder cases: 1555 // The sum of digits plus exponent is greater than 1556 // what we think we can do with one error. 1557 // 1558 // Start by weeding out obviously out-of-range 1559 // results, then convert to double and go to 1560 // common hard-case code. 1561 // 1562 if (decExponent > SINGLE_MAX_DECIMAL_EXPONENT + 1) { 1563 // 1564 // Lets face it. This is going to be 1565 // Infinity. Cut to the chase. 1566 // 1567 return (isNegative) ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 1568 } else if (decExponent < SINGLE_MIN_DECIMAL_EXPONENT - 1) { 1569 // 1570 // Lets face it. This is going to be 1571 // zero. Cut to the chase. 1572 // 1573 return (isNegative) ? -0.0f : 0.0f; 1574 } 1575 1576 // 1577 // Here, we do 'way too much work, but throwing away 1578 // our partial results, and going and doing the whole 1579 // thing as double, then throwing away half the bits that computes 1580 // when we convert back to float. 1581 // 1582 // The alternative is to reproduce the whole multiple-precision 1583 // algorithm for float precision, or to try to parameterize it 1584 // for common usage. The former will take about 400 lines of code, 1585 // and the latter I tried without success. Thus the semi-hack 1586 // answer here. 1587 // 1588 double dValue = doubleValue(true); 1589 return stickyRound(dValue, roundDir); 1590 } 1591 1592 1593 /** 1594 * All the positive powers of 10 that can be 1595 * represented exactly in double/float. 1596 */ 1597 private static final double[] SMALL_10_POW = { 1598 1.0e0, 1599 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1600 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1601 1.0e11, 1.0e12, 1.0e13, 1.0e14, 1.0e15, 1602 1.0e16, 1.0e17, 1.0e18, 1.0e19, 1.0e20, 1603 1.0e21, 1.0e22 1604 }; 1605 1606 private static final float[] SINGLE_SMALL_10_POW = { 1607 1.0e0f, 1608 1.0e1f, 1.0e2f, 1.0e3f, 1.0e4f, 1.0e5f, 1609 1.0e6f, 1.0e7f, 1.0e8f, 1.0e9f, 1.0e10f ``````1918 // 1919 // We parsed everything we could. 1920 // If there are leftovers, then this is not good input! 1921 // 1922 if ( i < len && 1923 ((i != len - 1) || 1924 (in.charAt(i) != 'f' && 1925 in.charAt(i) != 'F' && 1926 in.charAt(i) != 'd' && 1927 in.charAt(i) != 'D'))) { 1928 break parseNumber; // go throw exception 1929 } 1930 if(isZero) { 1931 return isNegative ? A2BC_NEGATIVE_ZERO : A2BC_POSITIVE_ZERO; 1932 } 1933 return new ASCIIToBinaryBuffer(isNegative, decExp, digits, nDigits); 1934 } catch ( StringIndexOutOfBoundsException e ){ } 1935 throw new NumberFormatException("For input string: \"" + in + "\""); 1936 } 1937 1938 /** 1939 * Rounds a double to a float. 1940 * In addition to the fraction bits of the double, 1941 * look at the class instance variable roundDir, 1942 * which should help us avoid double-rounding error. 1943 * roundDir was set in hardValueOf if the estimate was 1944 * close enough, but not exact. It tells us which direction 1945 * of rounding is preferred. 1946 */ 1947 static float stickyRound( double dval, int roundDirection ){ 1948 if(roundDirection!=0) { 1949 long lbits = Double.doubleToRawLongBits( dval ); 1950 long binexp = lbits & DoubleConsts.EXP_BIT_MASK; 1951 if ( binexp == 0L || binexp == DoubleConsts.EXP_BIT_MASK ){ 1952 // what we have here is special. 1953 // don't worry, the right thing will happen. 1954 return (float) dval; 1955 } 1956 lbits += (long)roundDirection; // hack-o-matic. 1957 return (float)Double.longBitsToDouble( lbits ); 1958 } else { 1959 return (float)dval; 1960 } 1961 } 1962 1963 1964 private static class HexFloatPattern { 1965 /** 1966 * Grammar is compatible with hexadecimal floating-point constants 1967 * described in section 6.4.4.2 of the C99 specification. 1968 */ 1969 private static final Pattern VALUE = Pattern.compile( 1970 //1 234 56 7 8 9 1971 "([-+])?0[xX](((\\p{XDigit}+)\\.?)|((\\p{XDigit}*)\\.(\\p{XDigit}+)))[pP]([-+])?(\\p{Digit}+)[fFdD]?" 1972 ); 1973 } 1974 1975 /** 1976 * Converts string s to a suitable floating decimal; uses the 1977 * double constructor and sets the roundDir variable appropriately 1978 * in case the value is later converted to a float. 1979 * 1980 * @param s The String to parse. 1981 */ 1982 static ASCIIToBinaryConverter parseHexString(String s) { 1983 // Verify string is a member of the hexadecimal floating-point ``````2265 throw new AssertionError("Unexpected shift distance remainder."); 2266 // break; 2267 } 2268 2269 // Round is set; sticky might be set. 2270 2271 // For the sticky bit, it suffices to check the 2272 // current digit and test for any nonzero digits in 2273 // the remaining unprocessed input. 2274 i++; 2275 while (i < signifLength && !sticky) { 2276 currentDigit = getHexDigit(significandString, i); 2277 sticky = sticky || (currentDigit != 0); 2278 i++; 2279 } 2280 2281 } 2282 // else all of string was seen, round and sticky are 2283 // correct as false. 2284 2285 // Check for overflow and update exponent accordingly. 2286 if (exponent > DoubleConsts.MAX_EXPONENT) { // Infinite result 2287 // overflow to properly signed infinity 2288 return isNegative ? A2BC_NEGATIVE_INFINITY : A2BC_POSITIVE_INFINITY; 2289 } else { // Finite return value 2290 if (exponent <= DoubleConsts.MAX_EXPONENT && // (Usually) normal result 2291 exponent >= DoubleConsts.MIN_EXPONENT) { 2292 2293 // The result returned in this block cannot be a 2294 // zero or subnormal; however after the 2295 // significand is adjusted from rounding, we could 2296 // still overflow in infinity. 2297 2298 // AND exponent bits into significand; if the 2299 // significand is incremented and overflows from 2300 // rounding, this combination will update the 2301 // exponent correctly, even in the case of 2302 // Double.MAX_VALUE overflowing to infinity. 2303 2304 significand = ((( exponent + ``````2373 // Number Round(x) 2374 // x0.00 x0. 2375 // x0.01 x0. 2376 // x0.10 x0. 2377 // x0.11 x1. = x0. +1 2378 // x1.00 x1. 2379 // x1.01 x1. 2380 // x1.10 x1. + 1 2381 // x1.11 x1. + 1 2382 // 2383 boolean leastZero = ((significand & 1L) == 0L); 2384 if ((leastZero && round && sticky) || 2385 ((!leastZero) && round)) { 2386 significand++; 2387 } 2388 2389 double value = isNegative ? 2390 Double.longBitsToDouble(significand | DoubleConsts.SIGN_BIT_MASK) : 2391 Double.longBitsToDouble(significand ); 2392 2393 int roundDir = 0; 2394 // 2395 // Set roundingDir variable field of fd properly so 2396 // that the input string can be properly rounded to a 2397 // float value. There are two cases to consider: 2398 // 2399 // 1. rounding to double discards sticky bit 2400 // information that would change the result of a float 2401 // rounding (near halfway case between two floats) 2402 // 2403 // 2. rounding to double rounds up when rounding up 2404 // would not occur when rounding to float. 2405 // 2406 // For former case only needs to be considered when 2407 // the bits rounded away when casting to float are all 2408 // zero; otherwise, float round bit is properly set 2409 // and sticky will already be true. 2410 // 2411 // The lower exponent bound for the code below is the 2412 // minimum (normalized) subnormal exponent - 1 since a 2413 // value with that exponent can round up to the 2414 // minimum subnormal value and the sticky bit 2415 // information must be preserved (i.e. case 1). 2416 // 2417 if ((exponent >= FloatConsts.MIN_SUB_EXPONENT - 1) && 2418 (exponent <= FloatConsts.MAX_EXPONENT)) { 2419 // Outside above exponent range, the float value 2420 // will be zero or infinity. 2421 2422 // 2423 // If the low-order 28 bits of a rounded double 2424 // significand are 0, the double could be a 2425 // half-way case for a rounding to float. If the 2426 // double value is a half-way case, the double 2427 // significand may have to be modified to round 2428 // the the right float value (see the stickyRound 2429 // method). If the rounding to double has lost 2430 // what would be float sticky bit information, the 2431 // double significand must be incremented. If the 2432 // double value's significand was itself 2433 // incremented, the float value may end up too 2434 // large so the increment should be undone. 2435 // 2436 if ((significand & 0xfffffffL) == 0x0L) { 2437 // For negative values, the sign of the 2438 // roundDir is the same as for positive values 2439 // since adding 1 increasing the significand's 2440 // magnitude and subtracting 1 decreases the 2441 // significand's magnitude. If neither round 2442 // nor sticky is true, the double value is 2443 // exact and no adjustment is required for a 2444 // proper float rounding. 2445 if (round || sticky) { 2446 if (leastZero) { // prerounding lsb is 0 2447 // If round and sticky were both true, 2448 // and the least significant 2449 // significand bit were 0, the rounded 2450 // significand would not have its 2451 // low-order bits be zero. Therefore, 2452 // we only need to adjust the 2453 // significand if round XOR sticky is 2454 // true. 2455 if (round ^ sticky) { 2456 roundDir = 1; 2457 } 2458 } else { // prerounding lsb is 1 2459 // If the prerounding lsb is 1 and the 2460 // resulting significand has its 2461 // low-order bits zero, the significand 2462 // was incremented. Here, we undo the 2463 // increment, which will ensure the 2464 // right guard and sticky bits for the 2465 // float rounding. 2466 if (round) { 2467 roundDir = -1; 2468 } 2469 } 2470 } 2471 } 2472 } 2473 return new PreparedASCIIToBinaryBuffer(value,roundDir); 2474 } 2475 } 2476 } 2477 2478 /** 2479 * Returns s with any leading zeros removed. 2480 */ 2481 static String stripLeadingZeros(String s) { 2482 // return s.replaceFirst("^0+", ""); 2483 if(!s.isEmpty() && s.charAt(0)=='0') { 2484 for(int i=1; iBinaryToASCIIConverter 35 * instance may be obtained and reused. 36 */ 37 public class FloatingDecimal{ 38 // 39 // Constants of the implementation; 40 // most are IEEE-754 related. 41 // (There are more really boring constants at the end.) 42 // 43 static final int EXP_SHIFT = DoubleConsts.SIGNIFICAND_WIDTH - 1; 44 static final long FRACT_HOB = ( 1L<String. 65 * 66 * @param d The double precision value. 67 * @return The value converted to a String. 68 */ 69 public static String toJavaFormatString(double d) { 70 return getBinaryToASCIIConverter(d).toJavaFormatString(); 71 } 72 73 /** 74 * Converts a single precision floating point value to a String. 75 * 76 * @param f The single precision value. 77 * @return The value converted to a String. 78 */ 79 public static String toJavaFormatString(float f) { `````` 987 } 988 989 /** 990 * A converter which can process an ASCII String representation 991 * of a single or double precision floating point value into a 992 * float or a double. 993 */ 994 interface ASCIIToBinaryConverter { 995 996 double doubleValue(); 997 998 float floatValue(); 999 1000 } 1001 1002 /** 1003 * A ASCIIToBinaryConverter container for a double. 1004 */ 1005 static class PreparedASCIIToBinaryBuffer implements ASCIIToBinaryConverter { 1006 final private double doubleVal; 1007 final private float floatVal; 1008 1009 public PreparedASCIIToBinaryBuffer(double doubleVal, float floatVal) { 1010 this.doubleVal = doubleVal; 1011 this.floatVal = floatVal; 1012 } 1013 1014 @Override 1015 public double doubleValue() { 1016 return doubleVal; 1017 } 1018 1019 @Override 1020 public float floatValue() { 1021 return floatVal; 1022 } 1023 } 1024 1025 static final ASCIIToBinaryConverter A2BC_POSITIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.POSITIVE_INFINITY, Float.POSITIVE_INFINITY); 1026 static final ASCIIToBinaryConverter A2BC_NEGATIVE_INFINITY = new PreparedASCIIToBinaryBuffer(Double.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY); 1027 static final ASCIIToBinaryConverter A2BC_NOT_A_NUMBER = new PreparedASCIIToBinaryBuffer(Double.NaN, Float.NaN); 1028 static final ASCIIToBinaryConverter A2BC_POSITIVE_ZERO = new PreparedASCIIToBinaryBuffer(0.0d, 0.0f); 1029 static final ASCIIToBinaryConverter A2BC_NEGATIVE_ZERO = new PreparedASCIIToBinaryBuffer(-0.0d, -0.0f); 1030 1031 /** 1032 * A buffered implementation of ASCIIToBinaryConverter. 1033 */ 1034 static class ASCIIToBinaryBuffer implements ASCIIToBinaryConverter { 1035 boolean isNegative; 1036 int decExponent; 1037 char digits[]; 1038 int nDigits; 1039 1040 ASCIIToBinaryBuffer( boolean negSign, int decExponent, char[] digits, int n) 1041 { 1042 this.isNegative = negSign; 1043 this.decExponent = decExponent; 1044 this.digits = digits; 1045 this.nDigits = n; 1046 } 1047 1048 /* 1049 * Takes a FloatingDecimal, which we presumably just scanned in, 1050 * and finds out what its value is, as a double. 1051 * 1052 * AS A SIDE EFFECT, SET roundDir TO INDICATE PREFERRED 1053 * ROUNDING DIRECTION in case the result is really destined 1054 * for a single-precision float. 1055 */ 1056 @Override 1057 public double doubleValue() { 1058 int kDigits = Math.min(nDigits, MAX_DECIMAL_DIGITS + 1); 1059 // 1060 // convert the lead kDigits to a long integer. 1061 // 1062 // (special performance hack: start to do it using int) 1063 int iValue = (int) digits[0] - (int) '0'; 1064 int iDigits = Math.min(kDigits, INT_DECIMAL_DIGITS); 1065 for (int i = 1; i < iDigits; i++) { 1066 iValue = iValue * 10 + (int) digits[i] - (int) '0'; 1067 } 1068 long lValue = (long) iValue; 1069 for (int i = iDigits; i < kDigits; i++) { 1070 lValue = lValue * 10L + (long) ((int) digits[i] - (int) '0'); 1071 } 1072 double dValue = (double) lValue; 1073 int exp = decExponent - kDigits; 1074 // 1075 // lValue now contains a long integer with the value of 1076 // the first kDigits digits of the number. 1077 // dValue contains the (double) of the same. 1078 // 1079 1080 if (nDigits <= MAX_DECIMAL_DIGITS) { 1081 // 1082 // possibly an easy case. 1083 // We know that the digits can be represented 1084 // exactly. And if the exponent isn't too outrageous, 1085 // the whole thing can be done with one operation, 1086 // thus one rounding error. 1087 // Note that all our constructors trim all leading and 1088 // trailing zeros, so simple values (including zero) 1089 // will always end up here 1090 // 1091 if (exp == 0 || dValue == 0.0) { 1092 return (isNegative) ? -dValue : dValue; // small floating integer 1093 } 1094 else if (exp >= 0) { 1095 if (exp <= MAX_SMALL_TEN) { 1096 // 1097 // Can get the answer with one operation, 1098 // thus one roundoff. 1099 // 1100 double rValue = dValue * SMALL_10_POW[exp]; 1101 return (isNegative) ? -rValue : rValue; 1102 } 1103 int slop = MAX_DECIMAL_DIGITS - kDigits; 1104 if (exp <= MAX_SMALL_TEN + slop) { 1105 // 1106 // We can multiply dValue by 10^(slop) 1107 // and it is still "small" and exact. 1108 // Then we can multiply by 10^(exp-slop) 1109 // with one rounding. 1110 // 1111 dValue *= SMALL_10_POW[slop]; 1112 double rValue = dValue * SMALL_10_POW[exp - slop]; 1113 return (isNegative) ? -rValue : rValue; 1114 } 1115 // 1116 // Else we have a hard case with a positive exp. 1117 // 1118 } else { 1119 if (exp >= -MAX_SMALL_TEN) { 1120 // 1121 // Can get the answer in one division. 1122 // 1123 double rValue = dValue / SMALL_10_POW[-exp]; 1124 return (isNegative) ? -rValue : rValue; 1125 } 1126 // 1127 // Else we have a hard case with a negative exp. 1128 // 1129 } 1130 } 1131 1132 // 1133 // Harder cases: 1134 // The sum of digits plus exponent is greater than 1135 // what we think we can do with one error. 1136 // 1137 // Start by approximating the right answer by, 1138 // naively, scaling by powers of 10. 1139 // 1140 if (exp > 0) { 1141 if (decExponent > MAX_DECIMAL_EXPONENT + 1) { 1142 // 1143 // Lets face it. This is going to be ``````1224 // Double.MIN_VALUE ). 1225 // 1226 t = dValue * 2.0; 1227 t *= TINY_10_POW[j]; 1228 if (t == 0.0) { 1229 return (isNegative) ? -0.0 : 0.0; 1230 } 1231 t = Double.MIN_VALUE; 1232 } 1233 dValue = t; 1234 } 1235 } 1236 1237 // 1238 // dValue is now approximately the result. 1239 // The hard part is adjusting it, by comparison 1240 // with FDBigInteger arithmetic. 1241 // Formulate the EXACT big-number result as 1242 // bigD0 * 10^exp 1243 // 1244 if (nDigits > MAX_NDIGITS) { 1245 nDigits = MAX_NDIGITS + 1; 1246 digits[MAX_NDIGITS] = '1'; 1247 } 1248 FDBigInteger bigD0 = new FDBigInteger(lValue, digits, kDigits, nDigits); 1249 exp = decExponent - nDigits; 1250 1251 long ieeeBits = Double.doubleToRawLongBits(dValue); // IEEE-754 bits of double candidate 1252 final int B5 = Math.max(0, -exp); // powers of 5 in bigB, value is not modified inside correctionLoop 1253 final int D5 = Math.max(0, exp); // powers of 5 in bigD, value is not modified inside correctionLoop 1254 bigD0 = bigD0.multByPow52(D5, 0); 1255 bigD0.makeImmutable(); // prevent bigD0 modification inside correctionLoop 1256 FDBigInteger bigD = null; 1257 int prevD2 = 0; 1258 1259 correctionLoop: 1260 while (true) { 1261 // here ieeeBits can't be NaN, Infinity or zero 1262 int binexp = (int) (ieeeBits >>> EXP_SHIFT); 1263 long bigBbits = ieeeBits & DoubleConsts.SIGNIF_BIT_MASK; 1264 if (binexp > 0) { 1265 bigBbits |= FRACT_HOB; 1266 } else { // Normalize denormalized numbers. 1267 assert bigBbits != 0L : bigBbits; // doubleToBigInt(0.0) 1268 int leadingZeros = Long.numberOfLeadingZeros(bigBbits); 1269 int shift = leadingZeros - (63 - EXP_SHIFT); 1270 bigBbits <<= shift; 1271 binexp = 1 - shift; 1272 } 1273 binexp -= DoubleConsts.EXP_BIAS; 1274 int lowOrderZeros = Long.numberOfTrailingZeros(bigBbits); 1275 bigBbits >>>= lowOrderZeros; 1276 final int bigIntExp = binexp - EXP_SHIFT + lowOrderZeros; 1277 final int bigIntNBits = EXP_SHIFT + 1 - lowOrderZeros; 1278 1279 // 1280 // Scale bigD, bigB appropriately for 1281 // big-integer operations. 1282 // Naively, we multiply by powers of ten 1283 // and powers of two. What we actually do 1284 // is keep track of the powers of 5 and 1285 // powers of 2 we would use, then factor out 1286 // common divisors before doing the work. 1287 // 1288 int B2 = B5; // powers of 2 in bigB 1289 int D2 = D5; // powers of 2 in bigD 1290 int Ulp2; // powers of 2 in halfUlp. 1291 if (bigIntExp >= 0) { 1292 B2 += bigIntExp; 1293 } else { 1294 D2 -= bigIntExp; 1295 } 1296 Ulp2 = B2; 1297 // shift bigB and bigD left by a number s. t. 1298 // halfUlp is still an integer. 1299 int hulpbias; 1300 if (binexp <= -DoubleConsts.EXP_BIAS) { 1301 // This is going to be a denormalized number 1302 // (if not actually zero). 1303 // half an ULP is at 2^-(DoubleConsts.EXP_BIAS+EXP_SHIFT+1) 1304 hulpbias = binexp + lowOrderZeros + DoubleConsts.EXP_BIAS; 1305 } else { 1306 hulpbias = 1 + lowOrderZeros; 1307 } 1308 B2 += hulpbias; 1309 D2 += hulpbias; 1310 // if there are common factors of 2, we might just as well 1311 // factor them out, as they add nothing useful. 1312 int common2 = Math.min(B2, Math.min(D2, Ulp2)); 1313 B2 -= common2; 1314 D2 -= common2; 1315 Ulp2 -= common2; 1316 // do multiplications by powers of 5 and 2 1317 FDBigInteger bigB = FDBigInteger.valueOfMulPow52(bigBbits, B5, B2); 1318 if (bigD == null || prevD2 != D2) { 1319 bigD = bigD0.leftShift(D2); 1320 prevD2 = D2; 1321 } 1322 // 1323 // to recap: ``````1347 Ulp2 -= 1; 1348 if (Ulp2 < 0) { 1349 // rats. Cannot de-scale ulp this far. 1350 // must scale diff in other direction. 1351 Ulp2 = 0; 1352 diff = diff.leftShift(1); 1353 } 1354 } 1355 } else if (cmpResult < 0) { 1356 overvalue = false; // our candidate is too small. 1357 diff = bigD.rightInplaceSub(bigB); // bigB is not user further - reuse 1358 } else { 1359 // the candidate is exactly right! 1360 // this happens with surprising frequency 1361 break correctionLoop; 1362 } 1363 cmpResult = diff.cmpPow52(B5, Ulp2); 1364 if ((cmpResult) < 0) { 1365 // difference is small. 1366 // this is close enough 1367 break correctionLoop; 1368 } else if (cmpResult == 0) { 1369 // difference is exactly half an ULP 1370 // round to some other value maybe, then finish 1371 if ((ieeeBits & 1) != 0) { // half ties to even 1372 ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp 1373 } 1374 break correctionLoop; 1375 } else { 1376 // difference is non-trivial. 1377 // could scale addend by ratio of difference to 1378 // halfUlp here, if we bothered to compute that difference. 1379 // Most of the time ( I hope ) it is about 1 anyway. 1380 ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp 1381 if (ieeeBits == 0 || ieeeBits == DoubleConsts.EXP_BIT_MASK) { // 0.0 or Double.POSITIVE_INFINITY 1382 break correctionLoop; // oops. Fell off end of range. 1383 } 1384 continue; // try again. 1385 } 1386 1387 } 1388 if (isNegative) { 1389 ieeeBits |= DoubleConsts.SIGN_BIT_MASK; 1390 } 1391 return Double.longBitsToDouble(ieeeBits); 1392 } 1393 1394 /** 1395 * Takes a FloatingDecimal, which we presumably just scanned in, 1396 * and finds out what its value is, as a float. 1397 * This is distinct from doubleValue() to avoid the extremely 1398 * unlikely case of a double rounding error, wherein the conversion 1399 * to double has one rounding error, and the conversion of that double 1400 * to a float has another rounding error, IN THE WRONG DIRECTION, 1401 * ( because of the preference to a zero low-order bit ). 1402 */ 1403 @Override 1404 public float floatValue() { 1405 int kDigits = Math.min(nDigits, SINGLE_MAX_DECIMAL_DIGITS + 1); 1406 // 1407 // convert the lead kDigits to an integer. 1408 // 1409 int iValue = (int) digits[0] - (int) '0'; 1410 for (int i = 1; i < kDigits; i++) { 1411 iValue = iValue * 10 + (int) digits[i] - (int) '0'; 1412 } 1413 float fValue = (float) iValue; 1414 int exp = decExponent - kDigits; 1415 // 1416 // iValue now contains an integer with the value of 1417 // the first kDigits digits of the number. 1418 // fValue contains the (float) of the same. 1419 // 1420 1421 if (nDigits <= SINGLE_MAX_DECIMAL_DIGITS) { 1422 // 1423 // possibly an easy case. 1424 // We know that the digits can be represented 1425 // exactly. And if the exponent isn't too outrageous, 1426 // the whole thing can be done with one operation, 1427 // thus one rounding error. 1428 // Note that all our constructors trim all leading and 1429 // trailing zeros, so simple values (including zero) 1430 // will always end up here. 1431 // 1432 if (exp == 0 || fValue == 0.0f) { 1433 return (isNegative) ? -fValue : fValue; // small floating integer 1434 } else if (exp >= 0) { 1435 if (exp <= SINGLE_MAX_SMALL_TEN) { 1436 // 1437 // Can get the answer with one operation, 1438 // thus one roundoff. 1439 // 1440 fValue *= SINGLE_SMALL_10_POW[exp]; 1441 return (isNegative) ? -fValue : fValue; 1442 } 1443 int slop = SINGLE_MAX_DECIMAL_DIGITS - kDigits; 1444 if (exp <= SINGLE_MAX_SMALL_TEN + slop) { 1445 // 1446 // We can multiply fValue by 10^(slop) 1447 // and it is still "small" and exact. 1448 // Then we can multiply by 10^(exp-slop) 1449 // with one rounding. 1450 // 1451 fValue *= SINGLE_SMALL_10_POW[slop]; 1452 fValue *= SINGLE_SMALL_10_POW[exp - slop]; 1453 return (isNegative) ? -fValue : fValue; 1454 } 1455 // 1456 // Else we have a hard case with a positive exp. 1457 // 1458 } else { 1459 if (exp >= -SINGLE_MAX_SMALL_TEN) { 1460 // 1461 // Can get the answer in one division. 1462 // 1463 fValue /= SINGLE_SMALL_10_POW[-exp]; 1464 return (isNegative) ? -fValue : fValue; 1465 } 1466 // ``````1476 // First, finish accumulating digits. 1477 // Then convert that integer to a double, multiply 1478 // by the appropriate power of ten, and convert to float. 1479 // 1480 long lValue = (long) iValue; 1481 for (int i = kDigits; i < nDigits; i++) { 1482 lValue = lValue * 10L + (long) ((int) digits[i] - (int) '0'); 1483 } 1484 double dValue = (double) lValue; 1485 exp = decExponent - nDigits; 1486 dValue *= SMALL_10_POW[exp]; 1487 fValue = (float) dValue; 1488 return (isNegative) ? -fValue : fValue; 1489 1490 } 1491 // 1492 // Harder cases: 1493 // The sum of digits plus exponent is greater than 1494 // what we think we can do with one error. 1495 // 1496 // Start by approximating the right answer by, 1497 // naively, scaling by powers of 10. 1498 // Scaling uses doubles to avoid overflow/underflow. 1499 // 1500 double dValue = fValue; 1501 if (exp > 0) { 1502 if (decExponent > SINGLE_MAX_DECIMAL_EXPONENT + 1) { 1503 // 1504 // Lets face it. This is going to be 1505 // Infinity. Cut to the chase. 1506 // 1507 return (isNegative) ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 1508 } 1509 if ((exp & 15) != 0) { 1510 dValue *= SMALL_10_POW[exp & 15]; 1511 } 1512 if ((exp >>= 4) != 0) { 1513 int j; 1514 for (j = 0; exp > 0; j++, exp >>= 1) { 1515 if ((exp & 1) != 0) { 1516 dValue *= BIG_10_POW[j]; 1517 } 1518 } 1519 } 1520 } else if (exp < 0) { 1521 exp = -exp; 1522 if (decExponent < SINGLE_MIN_DECIMAL_EXPONENT - 1) { 1523 // 1524 // Lets face it. This is going to be 1525 // zero. Cut to the chase. 1526 // 1527 return (isNegative) ? -0.0f : 0.0f; 1528 } 1529 if ((exp & 15) != 0) { 1530 dValue /= SMALL_10_POW[exp & 15]; 1531 } 1532 if ((exp >>= 4) != 0) { 1533 int j; 1534 for (j = 0; exp > 0; j++, exp >>= 1) { 1535 if ((exp & 1) != 0) { 1536 dValue *= TINY_10_POW[j]; 1537 } 1538 } 1539 } 1540 } 1541 fValue = Math.max(Float.MIN_VALUE, Math.min(Float.MAX_VALUE, (float) dValue)); 1542 1543 // 1544 // fValue is now approximately the result. 1545 // The hard part is adjusting it, by comparison 1546 // with FDBigInteger arithmetic. 1547 // Formulate the EXACT big-number result as 1548 // bigD0 * 10^exp 1549 // 1550 if (nDigits > SINGLE_MAX_NDIGITS) { 1551 nDigits = SINGLE_MAX_NDIGITS + 1; 1552 digits[SINGLE_MAX_NDIGITS] = '1'; 1553 } 1554 FDBigInteger bigD0 = new FDBigInteger(iValue, digits, kDigits, nDigits); 1555 exp = decExponent - nDigits; 1556 1557 int ieeeBits = Float.floatToRawIntBits(fValue); // IEEE-754 bits of float candidate 1558 final int B5 = Math.max(0, -exp); // powers of 5 in bigB, value is not modified inside correctionLoop 1559 final int D5 = Math.max(0, exp); // powers of 5 in bigD, value is not modified inside correctionLoop 1560 bigD0 = bigD0.multByPow52(D5, 0); 1561 bigD0.makeImmutable(); // prevent bigD0 modification inside correctionLoop 1562 FDBigInteger bigD = null; 1563 int prevD2 = 0; 1564 1565 correctionLoop: 1566 while (true) { 1567 // here ieeeBits can't be NaN, Infinity or zero 1568 int binexp = ieeeBits >>> SINGLE_EXP_SHIFT; 1569 int bigBbits = ieeeBits & FloatConsts.SIGNIF_BIT_MASK; 1570 if (binexp > 0) { 1571 bigBbits |= SINGLE_FRACT_HOB; 1572 } else { // Normalize denormalized numbers. 1573 assert bigBbits != 0 : bigBbits; // floatToBigInt(0.0) 1574 int leadingZeros = Integer.numberOfLeadingZeros(bigBbits); 1575 int shift = leadingZeros - (31 - SINGLE_EXP_SHIFT); 1576 bigBbits <<= shift; 1577 binexp = 1 - shift; 1578 } 1579 binexp -= FloatConsts.EXP_BIAS; 1580 int lowOrderZeros = Integer.numberOfTrailingZeros(bigBbits); 1581 bigBbits >>>= lowOrderZeros; 1582 final int bigIntExp = binexp - SINGLE_EXP_SHIFT + lowOrderZeros; 1583 final int bigIntNBits = SINGLE_EXP_SHIFT + 1 - lowOrderZeros; 1584 1585 // 1586 // Scale bigD, bigB appropriately for 1587 // big-integer operations. 1588 // Naively, we multiply by powers of ten 1589 // and powers of two. What we actually do 1590 // is keep track of the powers of 5 and 1591 // powers of 2 we would use, then factor out 1592 // common divisors before doing the work. 1593 // 1594 int B2 = B5; // powers of 2 in bigB 1595 int D2 = D5; // powers of 2 in bigD 1596 int Ulp2; // powers of 2 in halfUlp. 1597 if (bigIntExp >= 0) { 1598 B2 += bigIntExp; 1599 } else { 1600 D2 -= bigIntExp; 1601 } 1602 Ulp2 = B2; 1603 // shift bigB and bigD left by a number s. t. 1604 // halfUlp is still an integer. 1605 int hulpbias; 1606 if (binexp <= -FloatConsts.EXP_BIAS) { 1607 // This is going to be a denormalized number 1608 // (if not actually zero). 1609 // half an ULP is at 2^-(FloatConsts.EXP_BIAS+SINGLE_EXP_SHIFT+1) 1610 hulpbias = binexp + lowOrderZeros + FloatConsts.EXP_BIAS; 1611 } else { 1612 hulpbias = 1 + lowOrderZeros; 1613 } 1614 B2 += hulpbias; 1615 D2 += hulpbias; 1616 // if there are common factors of 2, we might just as well 1617 // factor them out, as they add nothing useful. 1618 int common2 = Math.min(B2, Math.min(D2, Ulp2)); 1619 B2 -= common2; 1620 D2 -= common2; 1621 Ulp2 -= common2; 1622 // do multiplications by powers of 5 and 2 1623 FDBigInteger bigB = FDBigInteger.valueOfMulPow52(bigBbits, B5, B2); 1624 if (bigD == null || prevD2 != D2) { 1625 bigD = bigD0.leftShift(D2); 1626 prevD2 = D2; 1627 } 1628 // 1629 // to recap: 1630 // bigB is the scaled-big-int version of our floating-point 1631 // candidate. 1632 // bigD is the scaled-big-int version of the exact value 1633 // as we understand it. 1634 // halfUlp is 1/2 an ulp of bigB, except for special cases 1635 // of exact powers of 2 1636 // 1637 // the plan is to compare bigB with bigD, and if the difference 1638 // is less than halfUlp, then we're satisfied. Otherwise, 1639 // use the ratio of difference to halfUlp to calculate a fudge 1640 // factor to add to the floating value, then go 'round again. 1641 // 1642 FDBigInteger diff; 1643 int cmpResult; 1644 boolean overvalue; 1645 if ((cmpResult = bigB.cmp(bigD)) > 0) { 1646 overvalue = true; // our candidate is too big. 1647 diff = bigB.leftInplaceSub(bigD); // bigB is not user further - reuse 1648 if ((bigIntNBits == 1) && (bigIntExp > -FloatConsts.EXP_BIAS + 1)) { 1649 // candidate is a normalized exact power of 2 and 1650 // is too big (larger than Float.MIN_NORMAL). We will be subtracting. 1651 // For our purposes, ulp is the ulp of the 1652 // next smaller range. 1653 Ulp2 -= 1; 1654 if (Ulp2 < 0) { 1655 // rats. Cannot de-scale ulp this far. 1656 // must scale diff in other direction. 1657 Ulp2 = 0; 1658 diff = diff.leftShift(1); 1659 } 1660 } 1661 } else if (cmpResult < 0) { 1662 overvalue = false; // our candidate is too small. 1663 diff = bigD.rightInplaceSub(bigB); // bigB is not user further - reuse 1664 } else { 1665 // the candidate is exactly right! 1666 // this happens with surprising frequency 1667 break correctionLoop; 1668 } 1669 cmpResult = diff.cmpPow52(B5, Ulp2); 1670 if ((cmpResult) < 0) { 1671 // difference is small. 1672 // this is close enough 1673 break correctionLoop; 1674 } else if (cmpResult == 0) { 1675 // difference is exactly half an ULP 1676 // round to some other value maybe, then finish 1677 if ((ieeeBits & 1) != 0) { // half ties to even 1678 ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp 1679 } 1680 break correctionLoop; 1681 } else { 1682 // difference is non-trivial. 1683 // could scale addend by ratio of difference to 1684 // halfUlp here, if we bothered to compute that difference. 1685 // Most of the time ( I hope ) it is about 1 anyway. 1686 ieeeBits += overvalue ? -1 : 1; // nextDown or nextUp 1687 if (ieeeBits == 0 || ieeeBits == FloatConsts.EXP_BIT_MASK) { // 0.0 or Float.POSITIVE_INFINITY 1688 break correctionLoop; // oops. Fell off end of range. 1689 } 1690 continue; // try again. 1691 } 1692 1693 } 1694 if (isNegative) { 1695 ieeeBits |= FloatConsts.SIGN_BIT_MASK; 1696 } 1697 return Float.intBitsToFloat(ieeeBits); 1698 } 1699 1700 1701 /** 1702 * All the positive powers of 10 that can be 1703 * represented exactly in double/float. 1704 */ 1705 private static final double[] SMALL_10_POW = { 1706 1.0e0, 1707 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1708 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1709 1.0e11, 1.0e12, 1.0e13, 1.0e14, 1.0e15, 1710 1.0e16, 1.0e17, 1.0e18, 1.0e19, 1.0e20, 1711 1.0e21, 1.0e22 1712 }; 1713 1714 private static final float[] SINGLE_SMALL_10_POW = { 1715 1.0e0f, 1716 1.0e1f, 1.0e2f, 1.0e3f, 1.0e4f, 1.0e5f, 1717 1.0e6f, 1.0e7f, 1.0e8f, 1.0e9f, 1.0e10f ``````2026 // 2027 // We parsed everything we could. 2028 // If there are leftovers, then this is not good input! 2029 // 2030 if ( i < len && 2031 ((i != len - 1) || 2032 (in.charAt(i) != 'f' && 2033 in.charAt(i) != 'F' && 2034 in.charAt(i) != 'd' && 2035 in.charAt(i) != 'D'))) { 2036 break parseNumber; // go throw exception 2037 } 2038 if(isZero) { 2039 return isNegative ? A2BC_NEGATIVE_ZERO : A2BC_POSITIVE_ZERO; 2040 } 2041 return new ASCIIToBinaryBuffer(isNegative, decExp, digits, nDigits); 2042 } catch ( StringIndexOutOfBoundsException e ){ } 2043 throw new NumberFormatException("For input string: \"" + in + "\""); 2044 } 2045 2046 private static class HexFloatPattern { 2047 /** 2048 * Grammar is compatible with hexadecimal floating-point constants 2049 * described in section 6.4.4.2 of the C99 specification. 2050 */ 2051 private static final Pattern VALUE = Pattern.compile( 2052 //1 234 56 7 8 9 2053 "([-+])?0[xX](((\\p{XDigit}+)\\.?)|((\\p{XDigit}*)\\.(\\p{XDigit}+)))[pP]([-+])?(\\p{Digit}+)[fFdD]?" 2054 ); 2055 } 2056 2057 /** 2058 * Converts string s to a suitable floating decimal; uses the 2059 * double constructor and sets the roundDir variable appropriately 2060 * in case the value is later converted to a float. 2061 * 2062 * @param s The String to parse. 2063 */ 2064 static ASCIIToBinaryConverter parseHexString(String s) { 2065 // Verify string is a member of the hexadecimal floating-point ``````2347 throw new AssertionError("Unexpected shift distance remainder."); 2348 // break; 2349 } 2350 2351 // Round is set; sticky might be set. 2352 2353 // For the sticky bit, it suffices to check the 2354 // current digit and test for any nonzero digits in 2355 // the remaining unprocessed input. 2356 i++; 2357 while (i < signifLength && !sticky) { 2358 currentDigit = getHexDigit(significandString, i); 2359 sticky = sticky || (currentDigit != 0); 2360 i++; 2361 } 2362 2363 } 2364 // else all of string was seen, round and sticky are 2365 // correct as false. 2366 2367 // Float calculations 2368 int floatBits = isNegative ? FloatConsts.SIGN_BIT_MASK : 0; 2369 if (exponent >= FloatConsts.MIN_EXPONENT) { 2370 if (exponent > FloatConsts.MAX_EXPONENT) { 2371 // Float.POSITIVE_INFINITY 2372 floatBits |= FloatConsts.EXP_BIT_MASK; 2373 } else { 2374 int threshShift = DoubleConsts.SIGNIFICAND_WIDTH - FloatConsts.SIGNIFICAND_WIDTH - 1; 2375 boolean floatSticky = (significand & ((1L << threshShift) - 1)) != 0 || round || sticky; 2376 int iValue = (int) (significand >>> threshShift); 2377 if ((iValue & 3) != 1 || floatSticky) { 2378 iValue++; 2379 } 2380 floatBits |= (((((int) exponent) + (FloatConsts.EXP_BIAS - 1))) << SINGLE_EXP_SHIFT) + (iValue >> 1); 2381 } 2382 } else { 2383 if (exponent < FloatConsts.MIN_SUB_EXPONENT - 1) { 2384 // 0 2385 } else { 2386 // exponent == -127 ==> threshShift = 53 - 2 + (-149) - (-127) = 53 - 24 2387 int threshShift = (int) ((DoubleConsts.SIGNIFICAND_WIDTH - 2 + FloatConsts.MIN_SUB_EXPONENT) - exponent); 2388 assert threshShift >= DoubleConsts.SIGNIFICAND_WIDTH - FloatConsts.SIGNIFICAND_WIDTH; 2389 assert threshShift < DoubleConsts.SIGNIFICAND_WIDTH; 2390 boolean floatSticky = (significand & ((1L << threshShift) - 1)) != 0 || round || sticky; 2391 int iValue = (int) (significand >>> threshShift); 2392 if ((iValue & 3) != 1 || floatSticky) { 2393 iValue++; 2394 } 2395 floatBits |= iValue >> 1; 2396 } 2397 } 2398 float fValue = Float.intBitsToFloat(floatBits); 2399 2400 // Check for overflow and update exponent accordingly. 2401 if (exponent > DoubleConsts.MAX_EXPONENT) { // Infinite result 2402 // overflow to properly signed infinity 2403 return isNegative ? A2BC_NEGATIVE_INFINITY : A2BC_POSITIVE_INFINITY; 2404 } else { // Finite return value 2405 if (exponent <= DoubleConsts.MAX_EXPONENT && // (Usually) normal result 2406 exponent >= DoubleConsts.MIN_EXPONENT) { 2407 2408 // The result returned in this block cannot be a 2409 // zero or subnormal; however after the 2410 // significand is adjusted from rounding, we could 2411 // still overflow in infinity. 2412 2413 // AND exponent bits into significand; if the 2414 // significand is incremented and overflows from 2415 // rounding, this combination will update the 2416 // exponent correctly, even in the case of 2417 // Double.MAX_VALUE overflowing to infinity. 2418 2419 significand = ((( exponent + ``````2488 // Number Round(x) 2489 // x0.00 x0. 2490 // x0.01 x0. 2491 // x0.10 x0. 2492 // x0.11 x1. = x0. +1 2493 // x1.00 x1. 2494 // x1.01 x1. 2495 // x1.10 x1. + 1 2496 // x1.11 x1. + 1 2497 // 2498 boolean leastZero = ((significand & 1L) == 0L); 2499 if ((leastZero && round && sticky) || 2500 ((!leastZero) && round)) { 2501 significand++; 2502 } 2503 2504 double value = isNegative ? 2505 Double.longBitsToDouble(significand | DoubleConsts.SIGN_BIT_MASK) : 2506 Double.longBitsToDouble(significand ); 2507 2508 return new PreparedASCIIToBinaryBuffer(value, fValue); 2509 } 2510 } 2511 } 2512 2513 /** 2514 * Returns s with any leading zeros removed. 2515 */ 2516 static String stripLeadingZeros(String s) { 2517 // return s.replaceFirst("^0+", ""); 2518 if(!s.isEmpty() && s.charAt(0)=='0') { 2519 for(int i=1; i