1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.objects;
  27 
  28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
  29 import static java.lang.Character.LOWERCASE_LETTER;
  30 import static java.lang.Character.OTHER_PUNCTUATION;
  31 import static java.lang.Character.SPACE_SEPARATOR;
  32 import static java.lang.Character.UPPERCASE_LETTER;
  33 
  34 import java.util.HashMap;
  35 import java.util.Locale;
  36 
  37 /**
  38  * JavaScript date parser. This class first tries to parse a date string
  39  * according to the extended ISO 8601 format specified in ES5 15.9.1.15.
  40  * If that fails, it falls back to legacy mode in which it accepts a range
  41  * of different formats.
  42  *
  43  * <p>This class is neither thread-safe nor reusable. Calling the
  44  * <tt>parse()</tt> method more than once will yield undefined results.</p>
  45  */
  46 public class DateParser {
  47 
  48     /** Constant for index position of parsed year value. */
  49     public final static int YEAR        = 0;
  50     /** Constant for index position of parsed month value. */
  51     public final static int MONTH       = 1;
  52     /** Constant for index position of parsed day value. */
  53     public final static int DAY         = 2;
  54     /** Constant for index position of parsed hour value. */
  55     public final static int HOUR        = 3;
  56     /** Constant for index position of parsed minute value. */
  57     public final static int MINUTE      = 4;
  58     /** Constant for index position of parsed second value. */
  59     public final static int SECOND      = 5;
  60     /** Constant for index position of parsed millisecond value. */
  61     public final static int MILLISECOND = 6;
  62     /** Constant for index position of parsed time zone offset value. */
  63     public final static int TIMEZONE    = 7;
  64 
  65     private enum Token {
  66         UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END
  67     }
  68 
  69     private final String string;
  70     private final int length;
  71     private final Integer[] fields;
  72     private int pos = 0;
  73     private Token token;
  74     private int tokenLength;
  75     private Name nameValue;
  76     private int numValue;
  77     private int currentField = YEAR;
  78     private int yearSign = 0;
  79     private boolean namedMonth = false;
  80 
  81     private final static HashMap<String,Name> names = new HashMap<>();
  82 
  83     static {
  84         addName("monday", Name.DAY_OF_WEEK, 0);
  85         addName("tuesday", Name.DAY_OF_WEEK, 0);
  86         addName("wednesday", Name.DAY_OF_WEEK, 0);
  87         addName("thursday", Name.DAY_OF_WEEK, 0);
  88         addName("friday", Name.DAY_OF_WEEK, 0);
  89         addName("saturday", Name.DAY_OF_WEEK, 0);
  90         addName("sunday", Name.DAY_OF_WEEK, 0);
  91         addName("january", Name.MONTH_NAME, 1);
  92         addName("february", Name.MONTH_NAME, 2);
  93         addName("march", Name.MONTH_NAME, 3);
  94         addName("april", Name.MONTH_NAME, 4);
  95         addName("may", Name.MONTH_NAME, 5);
  96         addName("june", Name.MONTH_NAME, 6);
  97         addName("july", Name.MONTH_NAME, 7);
  98         addName("august", Name.MONTH_NAME, 8);
  99         addName("september", Name.MONTH_NAME, 9);
 100         addName("october", Name.MONTH_NAME, 10);
 101         addName("november", Name.MONTH_NAME, 11);
 102         addName("december", Name.MONTH_NAME, 12);
 103         addName("am", Name.AM_PM, 0);
 104         addName("pm", Name.AM_PM, 12);
 105         addName("z", Name.TIMEZONE_ID, 0);
 106         addName("gmt", Name.TIMEZONE_ID, 0);
 107         addName("ut", Name.TIMEZONE_ID, 0);
 108         addName("utc", Name.TIMEZONE_ID, 0);
 109         addName("est", Name.TIMEZONE_ID, -5 * 60);
 110         addName("edt", Name.TIMEZONE_ID, -4 * 60);
 111         addName("cst", Name.TIMEZONE_ID, -6 * 60);
 112         addName("cdt", Name.TIMEZONE_ID, -5 * 60);
 113         addName("mst", Name.TIMEZONE_ID, -7 * 60);
 114         addName("mdt", Name.TIMEZONE_ID, -6 * 60);
 115         addName("pst", Name.TIMEZONE_ID, -8 * 60);
 116         addName("pdt", Name.TIMEZONE_ID, -7 * 60);
 117         addName("t", Name.TIME_SEPARATOR, 0);
 118     }
 119 
 120     /**
 121      * Construct a new <tt>DateParser</tt> instance for parsing the given string.
 122      * @param string the string to be parsed
 123      */
 124     public DateParser(final String string) {
 125         this.string = string;
 126         this.length = string.length();
 127         this.fields = new Integer[TIMEZONE + 1];
 128     }
 129 
 130     /**
 131      * Try parsing the given string as date according to the extended ISO 8601 format
 132      * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails.
 133      * This method returns <tt>true</tt> if the string could be parsed.
 134      * @return true if the string could be parsed as date
 135      */
 136     public boolean parse() {
 137         return parseEcmaDate() || parseLegacyDate();
 138     }
 139 
 140     /**
 141      * Try parsing the date string according to the rules laid out in ES5 15.9.1.15.
 142      * The date string must conform to the following format:
 143      *
 144      * <pre>  [('-'|'+')yy]yyyy[-MM[-dd]][hh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre>
 145      *
 146      * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field
 147      * is set to <tt>0</tt> (GMT).</p>
 148      * @return true if string represents a valid ES5 date string.
 149      */
 150     public boolean parseEcmaDate() {
 151 
 152         if (token == null) {
 153             token = next();
 154         }
 155 
 156         while (token != Token.END) {
 157 
 158             switch (token) {
 159                 case NUMBER:
 160                     if (currentField == YEAR && yearSign != 0) {
 161                         // 15.9.1.15.1 Extended year must have six digits
 162                         if (tokenLength != 6) {
 163                             return false;
 164                         }
 165                         numValue *= yearSign;
 166                     } else if (!checkEcmaField(currentField, numValue)) {
 167                         return false;
 168                     }
 169                     if (!skipEcmaDelimiter()) {
 170                         return false;
 171                     }
 172                     if (currentField < TIMEZONE) {
 173                         set(currentField++, numValue);
 174                     }
 175                     break;
 176 
 177                 case NAME:
 178                     if (nameValue == null) {
 179                         return false;
 180                     }
 181                     switch (nameValue.type) {
 182                         case Name.TIME_SEPARATOR:
 183                             if (currentField == YEAR || currentField > HOUR) {
 184                                 return false;
 185                             }
 186                             currentField = HOUR;
 187                             break;
 188                         case Name.TIMEZONE_ID:
 189                             if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) {
 190                                 return false;
 191                             }
 192                             break;
 193                         default:
 194                             return false;
 195                     }
 196                     break;
 197 
 198                 case SIGN:
 199                     if (currentField == YEAR) {
 200                         yearSign = numValue;
 201                     } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) {
 202                         // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds
 203                         return false;
 204                     }
 205                     break;
 206 
 207                 default:
 208                     return false;
 209             }
 210             token = next();
 211         }
 212 
 213         return patchResult(true);
 214     }
 215 
 216     /**
 217      * Try parsing the date using a fuzzy algorithm that can handle a variety of formats.
 218      *
 219      * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a
 220      * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values.
 221      * The exact sequence of day, month, and year values to apply is determined heuristically.</p>
 222      *
 223      * <p>English month names and selected time zone names as well as AM/PM markers are recognized
 224      * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or
 225      * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset
 226      * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p>
 227      *
 228      * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well.
 229      * All other text causes parsing to fail.</p>
 230      *
 231      * @return true if the string could be parsed
 232      */
 233     public boolean parseLegacyDate() {
 234 
 235         if (yearSign != 0 || currentField > DAY) {
 236             // we don't support signed years in legacy mode
 237             return false;
 238         }
 239         if (token == null) {
 240             token = next();
 241         }
 242 
 243         while (token != Token.END) {
 244 
 245             switch (token) {
 246                 case NUMBER:
 247                     if (skip(':')) {
 248                         // A number followed by ':' is parsed as time
 249                         if (!setTimeField(numValue)) {
 250                             return false;
 251                         }
 252                         // consume remaining time tokens
 253                         do {
 254                             token = next();
 255                             if (token != Token.NUMBER || !setTimeField(numValue)) {
 256                                 return false;
 257                             }
 258                         } while (skip(isSet(SECOND) ? '.' : ':'));
 259 
 260                     } else {
 261                         // Parse as date token
 262                         if (!setDateField(numValue)) {
 263                             return false;
 264                         }
 265                         skip('-');
 266                     }
 267                     break;
 268 
 269                 case NAME:
 270                     if (nameValue == null) {
 271                         return false;
 272                     }
 273                     switch (nameValue.type) {
 274                         case Name.AM_PM:
 275                             if (!setAmPm(nameValue.value)) {
 276                                 return false;
 277                             }
 278                             break;
 279                         case Name.MONTH_NAME:
 280                             if (!setMonth(nameValue.value)) {
 281                                 return false;
 282                             }
 283                             break;
 284                         case Name.TIMEZONE_ID:
 285                             if (!setTimezone(nameValue.value, false)) {
 286                                 return false;
 287                             }
 288                             break;
 289                         case Name.TIME_SEPARATOR:
 290                             return false;
 291                         default:
 292                             break;
 293                     }
 294                     if (nameValue.type != Name.TIMEZONE_ID) {
 295                         skip('-');
 296                     }
 297                     break;
 298 
 299                 case SIGN:
 300                     if (!setTimezone(readTimeZoneOffset(), true)) {
 301                         return false;
 302                     }
 303                     break;
 304 
 305                 case PARENTHESIS:
 306                     if (!skipParentheses()) {
 307                         return false;
 308                     }
 309                     break;
 310 
 311                 case SEPARATOR:
 312                     break;
 313 
 314                 default:
 315                     return false;
 316             }
 317             token = next();
 318         }
 319 
 320         return patchResult(false);
 321     }
 322 
 323     /**
 324      * Get the parsed date and time fields as an array of <tt>Integers</tt>.
 325      *
 326      * <p>If parsing was successful, all fields are guaranteed to be set except for the
 327      * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone
 328      * offset should be applied.</p>
 329      *
 330      * @return the parsed date fields
 331      */
 332     public Integer[] getDateFields() {
 333         return fields;
 334     }
 335 
 336     private boolean isSet(final int field) {
 337         return fields[field] != null;
 338     }
 339 
 340     private Integer get(final int field) {
 341         return fields[field];
 342     }
 343 
 344     private void set(final int field, final int value) {
 345         fields[field] = value;
 346     }
 347 
 348     private int peek() {
 349         return pos < length ? string.charAt(pos) : -1;
 350     }
 351 
 352     private boolean skip(final char c) {
 353         if (pos < length && string.charAt(pos) == c) {
 354             token = null;
 355             pos++;
 356             return true;
 357         }
 358         return false;
 359     }
 360 
 361     private Token next() {
 362         if (pos >= length) {
 363             tokenLength = 0;
 364             return Token.END;
 365         }
 366 
 367         final char c = string.charAt(pos);
 368 
 369         if (c > 0x80) {
 370             tokenLength = 1;
 371             pos++;
 372             return Token.UNKNOWN; // We only deal with ASCII here
 373         }
 374 
 375         final int type = Character.getType(c);
 376         switch (type) {
 377             case DECIMAL_DIGIT_NUMBER:
 378                 numValue = readNumber(6);
 379                 return Token.NUMBER;
 380             case SPACE_SEPARATOR :
 381             case OTHER_PUNCTUATION:
 382                 tokenLength = 1;
 383                 pos++;
 384                 return Token.SEPARATOR;
 385             case UPPERCASE_LETTER:
 386             case LOWERCASE_LETTER:
 387                 nameValue = readName();
 388                 return Token.NAME;
 389             default:
 390                 tokenLength = 1;
 391                 pos++;
 392                 switch (c) {
 393                     case '(':
 394                         return Token.PARENTHESIS;
 395                     case '-':
 396                     case '+':
 397                         numValue = c == '-' ? -1 : 1;
 398                         return Token.SIGN;
 399                     default:
 400                         return Token.UNKNOWN;
 401                 }
 402         }
 403     }
 404 
 405     private static boolean checkLegacyField(final int field, final int value) {
 406         switch (field) {
 407             case HOUR:
 408                 return isHour(value);
 409             case MINUTE:
 410             case SECOND:
 411                 return isMinuteOrSecond(value);
 412             case MILLISECOND:
 413                 return isMillisecond(value);
 414             default:
 415                 // skip validation on other legacy fields as we don't know what's what
 416                 return true;
 417         }
 418     }
 419 
 420     private boolean checkEcmaField(final int field, final int value) {
 421         switch (field) {
 422             case YEAR:
 423                 return tokenLength == 4;
 424             case MONTH:
 425                 return tokenLength == 2 && isMonth(value);
 426             case DAY:
 427                 return tokenLength == 2 && isDay(value);
 428             case HOUR:
 429                 return tokenLength == 2 && isHour(value);
 430             case MINUTE:
 431             case SECOND:
 432                 return tokenLength == 2 && isMinuteOrSecond(value);
 433             case MILLISECOND:
 434                 // we allow millisecond to be less than 3 digits
 435                 return tokenLength < 4 && isMillisecond(value);
 436             default:
 437                 return true;
 438         }
 439     }
 440 
 441     private boolean skipEcmaDelimiter() {
 442         switch (currentField) {
 443             case YEAR:
 444             case MONTH:
 445                 return skip('-') || peek() == 'T' || peek() == -1;
 446             case DAY:
 447                 return peek() == 'T' || peek() == -1;
 448             case HOUR:
 449             case MINUTE:
 450                 return skip(':') || endOfTime();
 451             case SECOND:
 452                 return skip('.') || endOfTime();
 453             default:
 454                 return true;
 455         }
 456     }
 457 
 458     private boolean endOfTime() {
 459         final int c = peek();
 460         return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' ';
 461     }
 462 
 463     private static boolean isAsciiLetter(final char ch) {
 464         return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
 465     }
 466 
 467     private static boolean isAsciiDigit(final char ch) {
 468         return '0' <= ch && ch <= '9';
 469     }
 470 
 471     private int readNumber(final int maxDigits) {
 472         final int start = pos;
 473         int n = 0;
 474         final int max = Math.min(length, pos + maxDigits);
 475         while (pos < max && isAsciiDigit(string.charAt(pos))) {
 476             n = n * 10 + string.charAt(pos++) - '0';
 477         }
 478         tokenLength = pos - start;
 479         return n;
 480     }
 481 
 482     private Name readName() {
 483         final int start = pos;
 484         final int limit = Math.min(pos + 3, length);
 485 
 486         // first read up to the key length
 487         while (pos < limit && isAsciiLetter(string.charAt(pos))) {
 488             pos++;
 489         }
 490         final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH);
 491         final Name name = names.get(key);
 492         // then advance to end of name
 493         while (pos < length && isAsciiLetter(string.charAt(pos))) {
 494             pos++;
 495         }
 496 
 497         tokenLength = pos - start;
 498         // make sure we have the full name or a prefix
 499         if (name != null && name.matches(string, start, tokenLength)) {
 500             return name;
 501         }
 502         return null;
 503     }
 504 
 505     private int readTimeZoneOffset() {
 506         final int sign = string.charAt(pos - 1) == '+' ? 1 : -1;
 507         int offset = readNumber(2);
 508         skip(':');
 509         offset = offset * 60 + readNumber(2);
 510         return sign * offset;
 511     }
 512 
 513     private boolean skipParentheses() {
 514         int parenCount = 1;
 515         while (pos < length && parenCount != 0) {
 516             final char c = string.charAt(pos++);
 517             if (c == '(') {
 518                 parenCount++;
 519             } else if (c == ')') {
 520                 parenCount--;
 521             }
 522         }
 523         return true;
 524     }
 525 
 526     private static int getDefaultValue(final int field) {
 527         switch (field) {
 528             case MONTH:
 529             case DAY:
 530                 return 1;
 531             default:
 532                 return 0;
 533         }
 534     }
 535 
 536     private static boolean isDay(final int n) {
 537         return 1 <= n && n <= 31;
 538     }
 539 
 540     private static boolean isMonth(final int n) {
 541         return 1 <= n && n <= 12;
 542     }
 543 
 544     private static boolean isHour(final int n) {
 545         return 0 <= n && n <= 24;
 546     }
 547 
 548     private static boolean isMinuteOrSecond(final int n) {
 549         return 0 <= n && n < 60;
 550     }
 551 
 552     private static boolean isMillisecond(final int n) {
 553         return 0<= n && n < 1000;
 554     }
 555 
 556     private boolean setMonth(final int m) {
 557         if (!isSet(MONTH)) {
 558             namedMonth = true;
 559             set(MONTH, m);
 560             return true;
 561         }
 562         return false;
 563     }
 564 
 565     private boolean setDateField(final int n) {
 566         for (int field = YEAR; field != HOUR; field++) {
 567             if (!isSet(field)) {
 568                 // no validation on legacy date fields
 569                 set(field, n);
 570                 return true;
 571             }
 572         }
 573         return false;
 574     }
 575 
 576     private boolean setTimeField(final int n) {
 577         for (int field = HOUR; field != TIMEZONE; field++) {
 578             if (!isSet(field)) {
 579                 if (checkLegacyField(field, n)) {
 580                     set(field, n);
 581                     return true;
 582                 }
 583                 return false;
 584             }
 585         }
 586         return false;
 587     }
 588 
 589     private boolean setTimezone(final int offset, final boolean asNumericOffset) {
 590         if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) {
 591             set(TIMEZONE, offset);
 592             return true;
 593         }
 594         return false;
 595     }
 596 
 597     private boolean setAmPm(final int offset) {
 598         if (!isSet(HOUR)) {
 599             return false;
 600         }
 601         final int hour = get(HOUR);
 602         if (hour >= 0 && hour <= 12) {
 603             set(HOUR, hour + offset);
 604         }
 605         return true;
 606     }
 607 
 608     private boolean patchResult(final boolean strict) {
 609         // sanity checks - make sure we have something
 610         if (!isSet(YEAR) && !isSet(HOUR)) {
 611             return false;
 612         }
 613         if (isSet(HOUR) && !isSet(MINUTE)) {
 614             return false;
 615         }
 616         // fill in default values for unset fields except timezone
 617         for (int field = YEAR; field <= TIMEZONE; field++) {
 618             if (get(field) == null) {
 619                 if (field == TIMEZONE && !strict) {
 620                     // We only use UTC as default timezone for dates parsed complying with
 621                     // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty
 622                     // and local timezone is used.
 623                     continue;
 624                 }
 625                 final int value = getDefaultValue(field);
 626                 set(field, value);
 627             }
 628         }
 629 
 630         if (!strict) {
 631             // swap year, month, and day if it looks like the right thing to do
 632             if (isDay(get(YEAR))) {
 633                 final int d = get(YEAR);
 634                 set(YEAR, get(DAY));
 635                 if (namedMonth) {
 636                     // d-m-y
 637                     set(DAY, d);
 638                 } else {
 639                     // m-d-y
 640                     final int d2 = get(MONTH);
 641                     set(MONTH, d);
 642                     set(DAY, d2);
 643                 }
 644             }
 645             // sanity checks now that we know what's what
 646             if (!isMonth(get(MONTH)) || !isDay(get(DAY))) {
 647                 return false;
 648             }
 649 
 650             // add 1900 or 2000 to year if it's between 0 and 100
 651             final int year = get(YEAR);
 652             if (year >= 0 && year < 100) {
 653                 set(YEAR, year >= 50 ? 1900 + year : 2000 + year);
 654             }
 655         } else {
 656             // 24 hour value is only allowed if all other time values are zero
 657             if (get(HOUR) == 24 &&
 658                     (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) {
 659                 return false;
 660             }
 661         }
 662 
 663         // set month to 0-based
 664         set(MONTH, get(MONTH) - 1);
 665         return true;
 666     }
 667 
 668     private static void addName(final String str, final int type, final int value) {
 669         final Name name = new Name(str, type, value);
 670         names.put(name.key, name);
 671     }
 672 
 673     private static class Name {
 674         final String name;
 675         final String key;
 676         final int value;
 677         final int type;
 678 
 679         final static int DAY_OF_WEEK    = -1;
 680         final static int MONTH_NAME     = 0;
 681         final static int AM_PM          = 1;
 682         final static int TIMEZONE_ID    = 2;
 683         final static int TIME_SEPARATOR = 3;
 684 
 685         Name(final String name, final int type, final int value) {
 686             assert name != null;
 687             assert name.equals(name.toLowerCase(Locale.ENGLISH));
 688 
 689             this.name = name;
 690             // use first three characters as lookup key
 691             this.key = name.substring(0, Math.min(3, name.length()));
 692             this.type = type;
 693             this.value = value;
 694         }
 695 
 696         public boolean matches(final String str, final int offset, final int len) {
 697             return name.regionMatches(true, 0, str, offset, len);
 698         }
 699 
 700         @Override
 701         public String toString() {
 702             return name;
 703         }
 704     }
 705 
 706 }