1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.parser;
  27 
  28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
  29 import static java.lang.Character.LOWERCASE_LETTER;
  30 import static java.lang.Character.OTHER_PUNCTUATION;
  31 import static java.lang.Character.SPACE_SEPARATOR;
  32 import static java.lang.Character.UPPERCASE_LETTER;
  33 
  34 import java.util.HashMap;
  35 import java.util.Locale;
  36 
  37 /**
  38  * JavaScript date parser. This class first tries to parse a date string
  39  * according to the extended ISO 8601 format specified in ES5 15.9.1.15.
  40  * If that fails, it falls back to legacy mode in which it accepts a range
  41  * of different formats.
  42  *
  43  * <p>This class is neither thread-safe nor reusable. Calling the
  44  * <code>parse()</code> method more than once will yield undefined results.</p>
  45  */
  46 public class DateParser {
  47 
  48     /** Constant for index position of parsed year value. */
  49     public final static int YEAR        = 0;
  50     /** Constant for index position of parsed month value. */
  51     public final static int MONTH       = 1;
  52     /** Constant for index position of parsed day value. */
  53     public final static int DAY         = 2;
  54     /** Constant for index position of parsed hour value. */
  55     public final static int HOUR        = 3;
  56     /** Constant for index position of parsed minute value. */
  57     public final static int MINUTE      = 4;
  58     /** Constant for index position of parsed second value. */
  59     public final static int SECOND      = 5;
  60     /** Constant for index position of parsed millisecond value. */
  61     public final static int MILLISECOND = 6;
  62     /** Constant for index position of parsed time zone offset value. */
  63     public final static int TIMEZONE    = 7;
  64 
  65     private enum Token {
  66         UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END
  67     }
  68 
  69     private final String string;
  70     private final int length;
  71     private final Integer[] fields;
  72     private int pos = 0;
  73     private Token token;
  74     private int tokenLength;
  75     private Name nameValue;
  76     private int numValue;
  77     private int currentField = YEAR;
  78     private int yearSign = 0;
  79     private boolean namedMonth = false;
  80 
  81     private final static HashMap<String,Name> names = new HashMap<>();
  82 
  83     static {
  84         addName("monday", Name.DAY_OF_WEEK, 0);
  85         addName("tuesday", Name.DAY_OF_WEEK, 0);
  86         addName("wednesday", Name.DAY_OF_WEEK, 0);
  87         addName("thursday", Name.DAY_OF_WEEK, 0);
  88         addName("friday", Name.DAY_OF_WEEK, 0);
  89         addName("saturday", Name.DAY_OF_WEEK, 0);
  90         addName("sunday", Name.DAY_OF_WEEK, 0);
  91         addName("january", Name.MONTH_NAME, 1);
  92         addName("february", Name.MONTH_NAME, 2);
  93         addName("march", Name.MONTH_NAME, 3);
  94         addName("april", Name.MONTH_NAME, 4);
  95         addName("may", Name.MONTH_NAME, 5);
  96         addName("june", Name.MONTH_NAME, 6);
  97         addName("july", Name.MONTH_NAME, 7);
  98         addName("august", Name.MONTH_NAME, 8);
  99         addName("september", Name.MONTH_NAME, 9);
 100         addName("october", Name.MONTH_NAME, 10);
 101         addName("november", Name.MONTH_NAME, 11);
 102         addName("december", Name.MONTH_NAME, 12);
 103         addName("am", Name.AM_PM, 0);
 104         addName("pm", Name.AM_PM, 12);
 105         addName("z", Name.TIMEZONE_ID, 0);
 106         addName("gmt", Name.TIMEZONE_ID, 0);
 107         addName("ut", Name.TIMEZONE_ID, 0);
 108         addName("utc", Name.TIMEZONE_ID, 0);
 109         addName("est", Name.TIMEZONE_ID, -5 * 60);
 110         addName("edt", Name.TIMEZONE_ID, -4 * 60);
 111         addName("cst", Name.TIMEZONE_ID, -6 * 60);
 112         addName("cdt", Name.TIMEZONE_ID, -5 * 60);
 113         addName("mst", Name.TIMEZONE_ID, -7 * 60);
 114         addName("mdt", Name.TIMEZONE_ID, -6 * 60);
 115         addName("pst", Name.TIMEZONE_ID, -8 * 60);
 116         addName("pdt", Name.TIMEZONE_ID, -7 * 60);
 117         addName("t", Name.TIME_SEPARATOR, 0);
 118     }
 119 
 120     /**
 121      * Construct a new <code>DateParser</code> instance for parsing the given string.
 122      * @param string the string to be parsed
 123      */
 124     public DateParser(final String string) {
 125         this.string = string;
 126         this.length = string.length();
 127         this.fields = new Integer[TIMEZONE + 1];
 128     }
 129 
 130     /**
 131      * Try parsing the given string as date according to the extended ISO 8601 format
 132      * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails.
 133      * This method returns <code>true</code> if the string could be parsed.
 134      * @return true if the string could be parsed as date
 135      */
 136     public boolean parse() {
 137         return parseEcmaDate() || parseLegacyDate();
 138     }
 139 
 140     /**
 141      * Try parsing the date string according to the rules laid out in ES5 15.9.1.15.
 142      * The date string must conform to the following format:
 143      *
 144      * <pre>  [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre>
 145      *
 146      * <p>If the string does not contain a time zone offset, the <code>TIMEZONE</code> field
 147      * is set to <code>0</code> (GMT).</p>
 148      * @return true if string represents a valid ES5 date string.
 149      */
 150     public boolean parseEcmaDate() {
 151 
 152         if (token == null) {
 153             token = next();
 154         }
 155 
 156         while (token != Token.END) {
 157 
 158             switch (token) {
 159                 case NUMBER:
 160                     if (currentField == YEAR && yearSign != 0) {
 161                         // 15.9.1.15.1 Extended year must have six digits
 162                         if (tokenLength != 6) {
 163                             return false;
 164                         }
 165                         numValue *= yearSign;
 166                     } else if (!checkEcmaField(currentField, numValue)) {
 167                         return false;
 168                     }
 169                     if (!skipEcmaDelimiter()) {
 170                         return false;
 171                     }
 172                     if (currentField < TIMEZONE) {
 173                         set(currentField++, numValue);
 174                     }
 175                     break;
 176 
 177                 case NAME:
 178                     if (nameValue == null) {
 179                         return false;
 180                     }
 181                     switch (nameValue.type) {
 182                         case Name.TIME_SEPARATOR:
 183                             if (currentField == YEAR || currentField > HOUR) {
 184                                 return false;
 185                             }
 186                             currentField = HOUR;
 187                             break;
 188                         case Name.TIMEZONE_ID:
 189                             if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) {
 190                                 return false;
 191                             }
 192                             break;
 193                         default:
 194                             return false;
 195                     }
 196                     break;
 197 
 198                 case SIGN:
 199                     if (peek() == -1) {
 200                         // END after sign - wrong!
 201                         return false;
 202                     }
 203 
 204                     if (currentField == YEAR) {
 205                         yearSign = numValue;
 206                     } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) {
 207                         // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds
 208                         return false;
 209                     }
 210                     break;
 211 
 212                 default:
 213                     return false;
 214             }
 215             token = next();
 216         }
 217 
 218         return patchResult(true);
 219     }
 220 
 221     /**
 222      * Try parsing the date using a fuzzy algorithm that can handle a variety of formats.
 223      *
 224      * <p>Numbers separated by <code>':'</code> are treated as time values, optionally followed by a
 225      * millisecond value separated by <code>'.'</code>. Other number values are treated as date values.
 226      * The exact sequence of day, month, and year values to apply is determined heuristically.</p>
 227      *
 228      * <p>English month names and selected time zone names as well as AM/PM markers are recognized
 229      * and handled properly. Additionally, numeric time zone offsets such as <code>(+|-)hh:mm</code> or
 230      * <code>(+|-)hhmm</code> are recognized. If the string does not contain a time zone offset
 231      * the <code>TIMEZONE</code>field is left undefined, meaning the local time zone should be applied.</p>
 232      *
 233      * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well.
 234      * All other text causes parsing to fail.</p>
 235      *
 236      * @return true if the string could be parsed
 237      */
 238     public boolean parseLegacyDate() {
 239 
 240         if (yearSign != 0 || currentField > DAY) {
 241             // we don't support signed years in legacy mode
 242             return false;
 243         }
 244         if (token == null) {
 245             token = next();
 246         }
 247 
 248         while (token != Token.END) {
 249 
 250             switch (token) {
 251                 case NUMBER:
 252                     if (skipDelimiter(':')) {
 253                         // A number followed by ':' is parsed as time
 254                         if (!setTimeField(numValue)) {
 255                             return false;
 256                         }
 257                         // consume remaining time tokens
 258                         do {
 259                             token = next();
 260                             if (token != Token.NUMBER || !setTimeField(numValue)) {
 261                                 return false;
 262                             }
 263                         } while (skipDelimiter(isSet(SECOND) ? '.' : ':'));
 264 
 265                     } else {
 266                         // Parse as date token
 267                         if (!setDateField(numValue)) {
 268                             return false;
 269                         }
 270                         skipDelimiter('-');
 271                     }
 272                     break;
 273 
 274                 case NAME:
 275                     if (nameValue == null) {
 276                         return false;
 277                     }
 278                     switch (nameValue.type) {
 279                         case Name.AM_PM:
 280                             if (!setAmPm(nameValue.value)) {
 281                                 return false;
 282                             }
 283                             break;
 284                         case Name.MONTH_NAME:
 285                             if (!setMonth(nameValue.value)) {
 286                                 return false;
 287                             }
 288                             break;
 289                         case Name.TIMEZONE_ID:
 290                             if (!setTimezone(nameValue.value, false)) {
 291                                 return false;
 292                             }
 293                             break;
 294                         case Name.TIME_SEPARATOR:
 295                             return false;
 296                         default:
 297                             break;
 298                     }
 299                     if (nameValue.type != Name.TIMEZONE_ID) {
 300                         skipDelimiter('-');
 301                     }
 302                     break;
 303 
 304                 case SIGN:
 305                     if (peek() == -1) {
 306                         // END after sign - wrong!
 307                         return false;
 308                     }
 309 
 310                     if (!setTimezone(readTimeZoneOffset(), true)) {
 311                         return false;
 312                     }
 313                     break;
 314 
 315                 case PARENTHESIS:
 316                     if (!skipParentheses()) {
 317                         return false;
 318                     }
 319                     break;
 320 
 321                 case SEPARATOR:
 322                     break;
 323 
 324                 default:
 325                     return false;
 326             }
 327             token = next();
 328         }
 329 
 330         return patchResult(false);
 331     }
 332 
 333     /**
 334      * Get the parsed date and time fields as an array of <code>Integers</code>.
 335      *
 336      * <p>If parsing was successful, all fields are guaranteed to be set except for the
 337      * <code>TIMEZONE</code> field which may be <code>null</code>, meaning that local time zone
 338      * offset should be applied.</p>
 339      *
 340      * @return the parsed date fields
 341      */
 342     public Integer[] getDateFields() {
 343         return fields;
 344     }
 345 
 346     private boolean isSet(final int field) {
 347         return fields[field] != null;
 348     }
 349 
 350     private Integer get(final int field) {
 351         return fields[field];
 352     }
 353 
 354     private void set(final int field, final int value) {
 355         fields[field] = value;
 356     }
 357 
 358     private int peek() {
 359         return pos < length ? string.charAt(pos) : -1;
 360     }
 361 
 362     // Skip delimiter if followed by a number. Used for ISO 8601 formatted dates
 363     private boolean skipNumberDelimiter(final char c) {
 364         if (pos < length - 1 && string.charAt(pos) == c
 365                 && Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) {
 366             token = null;
 367             pos++;
 368             return true;
 369         }
 370         return false;
 371     }
 372 
 373     private boolean skipDelimiter(final char c) {
 374         if (pos < length && string.charAt(pos) == c) {
 375             token = null;
 376             pos++;
 377             return true;
 378         }
 379         return false;
 380     }
 381 
 382     private Token next() {
 383         if (pos >= length) {
 384             tokenLength = 0;
 385             return Token.END;
 386         }
 387 
 388         final char c = string.charAt(pos);
 389 
 390         if (c > 0x80) {
 391             tokenLength = 1;
 392             pos++;
 393             return Token.UNKNOWN; // We only deal with ASCII here
 394         }
 395 
 396         final int type = Character.getType(c);
 397         switch (type) {
 398             case DECIMAL_DIGIT_NUMBER:
 399                 numValue = readNumber(6);
 400                 return Token.NUMBER;
 401             case SPACE_SEPARATOR :
 402             case OTHER_PUNCTUATION:
 403                 tokenLength = 1;
 404                 pos++;
 405                 return Token.SEPARATOR;
 406             case UPPERCASE_LETTER:
 407             case LOWERCASE_LETTER:
 408                 nameValue = readName();
 409                 return Token.NAME;
 410             default:
 411                 tokenLength = 1;
 412                 pos++;
 413                 switch (c) {
 414                     case '(':
 415                         return Token.PARENTHESIS;
 416                     case '-':
 417                     case '+':
 418                         numValue = c == '-' ? -1 : 1;
 419                         return Token.SIGN;
 420                     default:
 421                         return Token.UNKNOWN;
 422                 }
 423         }
 424     }
 425 
 426     private static boolean checkLegacyField(final int field, final int value) {
 427         switch (field) {
 428             case HOUR:
 429                 return isHour(value);
 430             case MINUTE:
 431             case SECOND:
 432                 return isMinuteOrSecond(value);
 433             case MILLISECOND:
 434                 return isMillisecond(value);
 435             default:
 436                 // skip validation on other legacy fields as we don't know what's what
 437                 return true;
 438         }
 439     }
 440 
 441     private boolean checkEcmaField(final int field, final int value) {
 442         switch (field) {
 443             case YEAR:
 444                 return tokenLength == 4;
 445             case MONTH:
 446                 return tokenLength == 2 && isMonth(value);
 447             case DAY:
 448                 return tokenLength == 2 && isDay(value);
 449             case HOUR:
 450                 return tokenLength == 2 && isHour(value);
 451             case MINUTE:
 452             case SECOND:
 453                 return tokenLength == 2 && isMinuteOrSecond(value);
 454             case MILLISECOND:
 455                 // we allow millisecond to be less than 3 digits
 456                 return tokenLength < 4 && isMillisecond(value);
 457             default:
 458                 return true;
 459         }
 460     }
 461 
 462     private boolean skipEcmaDelimiter() {
 463         switch (currentField) {
 464             case YEAR:
 465             case MONTH:
 466                 return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1;
 467             case DAY:
 468                 return peek() == 'T' || peek() == -1;
 469             case HOUR:
 470             case MINUTE:
 471                 return skipNumberDelimiter(':') || endOfTime();
 472             case SECOND:
 473                 return skipNumberDelimiter('.') || endOfTime();
 474             default:
 475                 return true;
 476         }
 477     }
 478 
 479     private boolean endOfTime() {
 480         final int c = peek();
 481         return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' ';
 482     }
 483 
 484     private static boolean isAsciiLetter(final char ch) {
 485         return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
 486     }
 487 
 488     private static boolean isAsciiDigit(final char ch) {
 489         return '0' <= ch && ch <= '9';
 490     }
 491 
 492     private int readNumber(final int maxDigits) {
 493         final int start = pos;
 494         int n = 0;
 495         final int max = Math.min(length, pos + maxDigits);
 496         while (pos < max && isAsciiDigit(string.charAt(pos))) {
 497             n = n * 10 + string.charAt(pos++) - '0';
 498         }
 499         tokenLength = pos - start;
 500         return n;
 501     }
 502 
 503     private Name readName() {
 504         final int start = pos;
 505         final int limit = Math.min(pos + 3, length);
 506 
 507         // first read up to the key length
 508         while (pos < limit && isAsciiLetter(string.charAt(pos))) {
 509             pos++;
 510         }
 511         final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH);
 512         final Name name = names.get(key);
 513         // then advance to end of name
 514         while (pos < length && isAsciiLetter(string.charAt(pos))) {
 515             pos++;
 516         }
 517 
 518         tokenLength = pos - start;
 519         // make sure we have the full name or a prefix
 520         if (name != null && name.matches(string, start, tokenLength)) {
 521             return name;
 522         }
 523         return null;
 524     }
 525 
 526     private int readTimeZoneOffset() {
 527         final int sign = string.charAt(pos - 1) == '+' ? 1 : -1;
 528         int offset = readNumber(2);
 529         skipDelimiter(':');
 530         offset = offset * 60 + readNumber(2);
 531         return sign * offset;
 532     }
 533 
 534     private boolean skipParentheses() {
 535         int parenCount = 1;
 536         while (pos < length && parenCount != 0) {
 537             final char c = string.charAt(pos++);
 538             if (c == '(') {
 539                 parenCount++;
 540             } else if (c == ')') {
 541                 parenCount--;
 542             }
 543         }
 544         return true;
 545     }
 546 
 547     private static int getDefaultValue(final int field) {
 548         switch (field) {
 549             case MONTH:
 550             case DAY:
 551                 return 1;
 552             default:
 553                 return 0;
 554         }
 555     }
 556 
 557     private static boolean isDay(final int n) {
 558         return 1 <= n && n <= 31;
 559     }
 560 
 561     private static boolean isMonth(final int n) {
 562         return 1 <= n && n <= 12;
 563     }
 564 
 565     private static boolean isHour(final int n) {
 566         return 0 <= n && n <= 24;
 567     }
 568 
 569     private static boolean isMinuteOrSecond(final int n) {
 570         return 0 <= n && n < 60;
 571     }
 572 
 573     private static boolean isMillisecond(final int n) {
 574         return 0<= n && n < 1000;
 575     }
 576 
 577     private boolean setMonth(final int m) {
 578         if (!isSet(MONTH)) {
 579             namedMonth = true;
 580             set(MONTH, m);
 581             return true;
 582         }
 583         return false;
 584     }
 585 
 586     private boolean setDateField(final int n) {
 587         for (int field = YEAR; field != HOUR; field++) {
 588             if (!isSet(field)) {
 589                 // no validation on legacy date fields
 590                 set(field, n);
 591                 return true;
 592             }
 593         }
 594         return false;
 595     }
 596 
 597     private boolean setTimeField(final int n) {
 598         for (int field = HOUR; field != TIMEZONE; field++) {
 599             if (!isSet(field)) {
 600                 if (checkLegacyField(field, n)) {
 601                     set(field, n);
 602                     return true;
 603                 }
 604                 return false;
 605             }
 606         }
 607         return false;
 608     }
 609 
 610     private boolean setTimezone(final int offset, final boolean asNumericOffset) {
 611         if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) {
 612             set(TIMEZONE, offset);
 613             return true;
 614         }
 615         return false;
 616     }
 617 
 618     private boolean setAmPm(final int offset) {
 619         if (!isSet(HOUR)) {
 620             return false;
 621         }
 622         final int hour = get(HOUR);
 623         if (hour >= 0 && hour <= 12) {
 624             set(HOUR, hour + offset);
 625         }
 626         return true;
 627     }
 628 
 629     private boolean patchResult(final boolean strict) {
 630         // sanity checks - make sure we have something
 631         if (!isSet(YEAR) && !isSet(HOUR)) {
 632             return false;
 633         }
 634         if (isSet(HOUR) && !isSet(MINUTE)) {
 635             return false;
 636         }
 637         // fill in default values for unset fields except timezone
 638         for (int field = YEAR; field <= TIMEZONE; field++) {
 639             if (get(field) == null) {
 640                 if (field == TIMEZONE && !strict) {
 641                     // We only use UTC as default timezone for dates parsed complying with
 642                     // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty
 643                     // and local timezone is used.
 644                     continue;
 645                 }
 646                 final int value = getDefaultValue(field);
 647                 set(field, value);
 648             }
 649         }
 650 
 651         if (!strict) {
 652             // swap year, month, and day if it looks like the right thing to do
 653             if (isDay(get(YEAR))) {
 654                 final int d = get(YEAR);
 655                 set(YEAR, get(DAY));
 656                 if (namedMonth) {
 657                     // d-m-y
 658                     set(DAY, d);
 659                 } else {
 660                     // m-d-y
 661                     final int d2 = get(MONTH);
 662                     set(MONTH, d);
 663                     set(DAY, d2);
 664                 }
 665             }
 666             // sanity checks now that we know what's what
 667             if (!isMonth(get(MONTH)) || !isDay(get(DAY))) {
 668                 return false;
 669             }
 670 
 671             // add 1900 or 2000 to year if it's between 0 and 100
 672             final int year = get(YEAR);
 673             if (year >= 0 && year < 100) {
 674                 set(YEAR, year >= 50 ? 1900 + year : 2000 + year);
 675             }
 676         } else {
 677             // 24 hour value is only allowed if all other time values are zero
 678             if (get(HOUR) == 24 &&
 679                     (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) {
 680                 return false;
 681             }
 682         }
 683 
 684         // set month to 0-based
 685         set(MONTH, get(MONTH) - 1);
 686         return true;
 687     }
 688 
 689     private static void addName(final String str, final int type, final int value) {
 690         final Name name = new Name(str, type, value);
 691         names.put(name.key, name);
 692     }
 693 
 694     private static class Name {
 695         final String name;
 696         final String key;
 697         final int value;
 698         final int type;
 699 
 700         final static int DAY_OF_WEEK    = -1;
 701         final static int MONTH_NAME     = 0;
 702         final static int AM_PM          = 1;
 703         final static int TIMEZONE_ID    = 2;
 704         final static int TIME_SEPARATOR = 3;
 705 
 706         Name(final String name, final int type, final int value) {
 707             assert name != null;
 708             assert name.equals(name.toLowerCase(Locale.ENGLISH));
 709 
 710             this.name = name;
 711             // use first three characters as lookup key
 712             this.key = name.substring(0, Math.min(3, name.length()));
 713             this.type = type;
 714             this.value = value;
 715         }
 716 
 717         public boolean matches(final String str, final int offset, final int len) {
 718             return name.regionMatches(true, 0, str, offset, len);
 719         }
 720 
 721         @Override
 722         public String toString() {
 723             return name;
 724         }
 725     }
 726 
 727 }