1 /*
   2  * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package jdk.nashorn.internal.objects;
  27 
  28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
  29 import static java.lang.Character.LOWERCASE_LETTER;
  30 import static java.lang.Character.OTHER_PUNCTUATION;
  31 import static java.lang.Character.SPACE_SEPARATOR;
  32 import static java.lang.Character.UPPERCASE_LETTER;
  33 
  34 import java.util.HashMap;

  35 
  36 /**
  37  * JavaScript date parser. This class first tries to parse a date string
  38  * according to the extended ISO 8601 format specified in ES5 15.9.1.15.
  39  * If that fails, it falls back to legacy mode in which it accepts a range
  40  * of different formats.
  41  *
  42  * <p>This class is neither thread-safe nor reusable. Calling the
  43  * <tt>parse()</tt> method more than once will yield undefined results.</p>
  44  */
  45 public class DateParser {
  46 
  47     /** Constant for index position of parsed year value. */
  48     public final static int YEAR        = 0;
  49     /** Constant for index position of parsed month value. */
  50     public final static int MONTH       = 1;
  51     /** Constant for index position of parsed day value. */
  52     public final static int DAY         = 2;
  53     /** Constant for index position of parsed hour value. */
  54     public final static int HOUR        = 3;
  55     /** Constant for index position of parsed minute value. */
  56     public final static int MINUTE      = 4;
  57     /** Constant for index position of parsed second value. */
  58     public final static int SECOND      = 5;
  59     /** Constant for index position of parsed millisecond value. */
  60     public final static int MILLISECOND = 6;
  61     /** Constant for index position of parsed time zone offset value. */
  62     public final static int TIMEZONE    = 7;
  63 
  64     private enum Token {
  65         UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END
  66     }
  67 
  68     private final String string;
  69     private final int length;
  70     private final Integer[] fields;
  71     private int pos = 0;
  72     private Token token;
  73     private int tokenLength;
  74     private Name nameValue;
  75     private int numValue;
  76     private int currentField = YEAR;
  77     private int yearSign = 0;
  78     private boolean namedMonth = false;
  79 
  80     private final static HashMap<String,Name> names = new HashMap<>();
  81 
  82     static {
  83         addName("monday", Name.DAY_OF_WEEK, 0);
  84         addName("tuesday", Name.DAY_OF_WEEK, 0);
  85         addName("wednesday", Name.DAY_OF_WEEK, 0);
  86         addName("thursday", Name.DAY_OF_WEEK, 0);
  87         addName("friday", Name.DAY_OF_WEEK, 0);
  88         addName("saturday", Name.DAY_OF_WEEK, 0);
  89         addName("sunday", Name.DAY_OF_WEEK, 0);
  90         addName("january", Name.MONTH_NAME, 1);
  91         addName("february", Name.MONTH_NAME, 2);
  92         addName("march", Name.MONTH_NAME, 3);
  93         addName("april", Name.MONTH_NAME, 4);
  94         addName("may", Name.MONTH_NAME, 5);
  95         addName("june", Name.MONTH_NAME, 6);
  96         addName("july", Name.MONTH_NAME, 7);
  97         addName("august", Name.MONTH_NAME, 8);
  98         addName("september", Name.MONTH_NAME, 9);
  99         addName("october", Name.MONTH_NAME, 10);
 100         addName("november", Name.MONTH_NAME, 11);
 101         addName("december", Name.MONTH_NAME, 12);
 102         addName("am", Name.AM_PM, 0);
 103         addName("pm", Name.AM_PM, 12);
 104         addName("z", Name.TIMEZONE_ID, 0);
 105         addName("gmt", Name.TIMEZONE_ID, 0);
 106         addName("ut", Name.TIMEZONE_ID, 0);
 107         addName("utc", Name.TIMEZONE_ID, 0);
 108         addName("est", Name.TIMEZONE_ID, -5 * 60);
 109         addName("edt", Name.TIMEZONE_ID, -4 * 60);
 110         addName("cst", Name.TIMEZONE_ID, -6 * 60);
 111         addName("cdt", Name.TIMEZONE_ID, -5 * 60);
 112         addName("mst", Name.TIMEZONE_ID, -7 * 60);
 113         addName("mdt", Name.TIMEZONE_ID, -6 * 60);
 114         addName("pst", Name.TIMEZONE_ID, -8 * 60);
 115         addName("pdt", Name.TIMEZONE_ID, -7 * 60);
 116         addName("t", Name.TIME_SEPARATOR, 0);
 117     }
 118 
 119     /**
 120      * Construct a new <tt>DateParser</tt> instance for parsing the given string.
 121      * @param string the string to be parsed
 122      */
 123     public DateParser(final String string) {
 124         this.string = string;
 125         this.length = string.length();
 126         this.fields = new Integer[TIMEZONE + 1];
 127     }
 128 
 129     /**
 130      * Try parsing the given string as date according to the extended ISO 8601 format
 131      * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails.
 132      * This method returns <tt>true</tt> if the string could be parsed.
 133      * @return true if the string could be parsed as date
 134      */
 135     public boolean parse() {
 136         return parseEcmaDate() || parseLegacyDate();
 137     }
 138 
 139     /**
 140      * Try parsing the date string according to the rules laid out in ES5 15.9.1.15.
 141      * The date string must conform to the following format:
 142      *
 143      * <pre>  [('-'|'+')yy]yyyy[-MM[-dd]][hh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre>
 144      *
 145      * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field
 146      * is set to <tt>0</tt> (GMT).</p>
 147      * @return true if string represents a valid ES5 date string.
 148      */
 149     public boolean parseEcmaDate() {
 150 
 151         if (token == null) {
 152             token = next();
 153         }
 154 
 155         while (token != Token.END) {
 156 
 157             switch (token) {
 158                 case NUMBER:
 159                     if (currentField == YEAR && yearSign != 0) {
 160                         // 15.9.1.15.1 Extended year must have six digits
 161                         if (tokenLength != 6) {
 162                             return false;
 163                         }
 164                         numValue *= yearSign;
 165                     } else if (!checkEcmaField(currentField, numValue)) {
 166                         return false;
 167                     }
 168                     if (!skipEcmaDelimiter()) {
 169                         return false;
 170                     }
 171                     if (currentField < TIMEZONE) {
 172                         set(currentField++, numValue);
 173                     }
 174                     break;
 175 
 176                 case NAME:
 177                     if (nameValue == null) {
 178                         return false;
 179                     }
 180                     switch (nameValue.type) {
 181                         case Name.TIME_SEPARATOR:
 182                             if (currentField == YEAR || currentField > HOUR) {
 183                                 return false;
 184                             }
 185                             currentField = HOUR;
 186                             break;
 187                         case Name.TIMEZONE_ID:
 188                             if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) {
 189                                 return false;
 190                             }
 191                             break;
 192                         default:
 193                             return false;
 194                     }
 195                     break;
 196 
 197                 case SIGN:
 198                     if (currentField == YEAR) {
 199                         yearSign = numValue;
 200                     } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) {
 201                         // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds
 202                         return false;
 203                     }
 204                     break;
 205 
 206                 default:
 207                     return false;
 208             }
 209             token = next();
 210         }
 211 
 212         return patchResult(true);
 213     }
 214 
 215     /**
 216      * Try parsing the date using a fuzzy algorithm that can handle a variety of formats.
 217      *
 218      * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a
 219      * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values.
 220      * The exact sequence of day, month, and year values to apply is determined heuristically.</p>
 221      *
 222      * <p>English month names and selected time zone names as well as AM/PM markers are recognized
 223      * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or
 224      * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset
 225      * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p>
 226      *
 227      * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well.
 228      * All other text causes parsing to fail.</p>
 229      *
 230      * @return true if the string could be parsed
 231      */
 232     public boolean parseLegacyDate() {
 233 
 234         if (yearSign != 0 || currentField > DAY) {
 235             // we don't support signed years in legacy mode
 236             return false;
 237         }
 238         if (token == null) {
 239             token = next();
 240         }
 241 
 242         while (token != Token.END) {
 243 
 244             switch (token) {
 245                 case NUMBER:
 246                     if (skip(':')) {
 247                         // A number followed by ':' is parsed as time
 248                         if (!setTimeField(numValue)) {
 249                             return false;
 250                         }
 251                         // consume remaining time tokens
 252                         do {
 253                             token = next();
 254                             if (token != Token.NUMBER || !setTimeField(numValue)) {
 255                                 return false;
 256                             }
 257                         } while (skip(isSet(SECOND) ? '.' : ':'));
 258 
 259                     } else {
 260                         // Parse as date token
 261                         if (!setDateField(numValue)) {
 262                             return false;
 263                         }
 264                         skip('-');
 265                     }
 266                     break;
 267 
 268                 case NAME:
 269                     if (nameValue == null) {
 270                         return false;
 271                     }
 272                     switch (nameValue.type) {
 273                         case Name.AM_PM:
 274                             if (!setAmPm(nameValue.value)) {
 275                                 return false;
 276                             }
 277                             break;
 278                         case Name.MONTH_NAME:
 279                             if (!setMonth(nameValue.value)) {
 280                                 return false;
 281                             }
 282                             break;
 283                         case Name.TIMEZONE_ID:
 284                             if (!setTimezone(nameValue.value, false)) {
 285                                 return false;
 286                             }
 287                             break;
 288                         case Name.TIME_SEPARATOR:
 289                             return false;
 290                         default:
 291                             break;
 292                     }
 293                     if (nameValue.type != Name.TIMEZONE_ID) {
 294                         skip('-');
 295                     }
 296                     break;
 297 
 298                 case SIGN:
 299                     if (!setTimezone(readTimeZoneOffset(), true)) {
 300                         return false;
 301                     }
 302                     break;
 303 
 304                 case PARENTHESIS:
 305                     if (!skipParentheses()) {
 306                         return false;
 307                     }
 308                     break;
 309 
 310                 case SEPARATOR:
 311                     break;
 312 
 313                 default:
 314                     return false;
 315             }
 316             token = next();
 317         }
 318 
 319         return patchResult(false);
 320     }
 321 
 322     /**
 323      * Get the parsed date and time fields as an array of <tt>Integers</tt>.
 324      *
 325      * <p>If parsing was successful, all fields are guaranteed to be set except for the
 326      * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone
 327      * offset should be applied.</p>
 328      *
 329      * @return the parsed date fields
 330      */
 331     public Integer[] getDateFields() {
 332         return fields;
 333     }
 334 
 335     private boolean isSet(final int field) {
 336         return fields[field] != null;
 337     }
 338 
 339     private Integer get(final int field) {
 340         return fields[field];
 341     }
 342 
 343     private void set(final int field, final int value) {
 344         fields[field] = value;
 345     }
 346 
 347     private int peek() {
 348         return pos < length ? string.charAt(pos) : -1;
 349     }
 350 
 351     private boolean skip(final char c) {
 352         if (pos < length && string.charAt(pos) == c) {
 353             token = null;
 354             pos++;
 355             return true;
 356         }
 357         return false;
 358     }
 359 
 360     private Token next() {
 361         if (pos >= length) {
 362             tokenLength = 0;
 363             return Token.END;
 364         }
 365 
 366         final char c = string.charAt(pos);
 367 
 368         if (c > 0x80) {
 369             tokenLength = 1;
 370             pos++;
 371             return Token.UNKNOWN; // We only deal with ASCII here
 372         }
 373 
 374         final int type = Character.getType(c);
 375         switch (type) {
 376             case DECIMAL_DIGIT_NUMBER:
 377                 numValue = readNumber(6);
 378                 return Token.NUMBER;
 379             case SPACE_SEPARATOR :
 380             case OTHER_PUNCTUATION:
 381                 tokenLength = 1;
 382                 pos++;
 383                 return Token.SEPARATOR;
 384             case UPPERCASE_LETTER:
 385             case LOWERCASE_LETTER:
 386                 nameValue = readName();
 387                 return Token.NAME;
 388             default:
 389                 tokenLength = 1;
 390                 pos++;
 391                 switch (c) {
 392                     case '(':
 393                         return Token.PARENTHESIS;
 394                     case '-':
 395                     case '+':
 396                         numValue = c == '-' ? -1 : 1;
 397                         return Token.SIGN;
 398                     default:
 399                         return Token.UNKNOWN;
 400                 }
 401         }
 402     }
 403 
 404     private static boolean checkLegacyField(final int field, final int value) {
 405         switch (field) {
 406             case HOUR:
 407                 return isHour(value);
 408             case MINUTE:
 409             case SECOND:
 410                 return isMinuteOrSecond(value);
 411             case MILLISECOND:
 412                 return isMillisecond(value);
 413             default:
 414                 // skip validation on other legacy fields as we don't know what's what
 415                 return true;
 416         }
 417     }
 418 
 419     private boolean checkEcmaField(final int field, final int value) {
 420         switch (field) {
 421             case YEAR:
 422                 return tokenLength == 4;
 423             case MONTH:
 424                 return tokenLength == 2 && isMonth(value);
 425             case DAY:
 426                 return tokenLength == 2 && isDay(value);
 427             case HOUR:
 428                 return tokenLength == 2 && isHour(value);
 429             case MINUTE:
 430             case SECOND:
 431                 return tokenLength == 2 && isMinuteOrSecond(value);
 432             case MILLISECOND:
 433                 // we allow millisecond to be less than 3 digits
 434                 return tokenLength < 4 && isMillisecond(value);
 435             default:
 436                 return true;
 437         }
 438     }
 439 
 440     private boolean skipEcmaDelimiter() {
 441         switch (currentField) {
 442             case YEAR:
 443             case MONTH:
 444                 return skip('-') || peek() == 'T' || peek() == -1;
 445             case DAY:
 446                 return peek() == 'T' || peek() == -1;
 447             case HOUR:
 448             case MINUTE:
 449                 return skip(':') || endOfTime();
 450             case SECOND:
 451                 return skip('.') || endOfTime();
 452             default:
 453                 return true;
 454         }
 455     }
 456 
 457     private boolean endOfTime() {
 458         final int c = peek();
 459         return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' ';
 460     }
 461 
 462     private static boolean isAsciiLetter(final char ch) {
 463         return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
 464     }
 465 
 466     private static boolean isAsciiDigit(final char ch) {
 467         return '0' <= ch && ch <= '9';
 468     }
 469 
 470     private int readNumber(final int maxDigits) {
 471         final int start = pos;
 472         int n = 0;
 473         final int max = Math.min(length, pos + maxDigits);
 474         while (pos < max && isAsciiDigit(string.charAt(pos))) {
 475             n = n * 10 + string.charAt(pos++) - '0';
 476         }
 477         tokenLength = pos - start;
 478         return n;
 479     }
 480 
 481     private Name readName() {
 482         final int start = pos;
 483         final int limit = Math.min(pos + 3, length);
 484 
 485         // first read up to the key length
 486         while (pos < limit && isAsciiLetter(string.charAt(pos))) {
 487             pos++;
 488         }
 489         final String key = string.substring(start, pos).toLowerCase();
 490         final Name name = names.get(key);
 491         // then advance to end of name
 492         while (pos < length && isAsciiLetter(string.charAt(pos))) {
 493             pos++;
 494         }
 495 
 496         tokenLength = pos - start;
 497         // make sure we have the full name or a prefix
 498         if (name != null && name.matches(string, start, tokenLength)) {
 499             return name;
 500         }
 501         return null;
 502     }
 503 
 504     private int readTimeZoneOffset() {
 505         final int sign = string.charAt(pos - 1) == '+' ? 1 : -1;
 506         int offset = readNumber(2);
 507         skip(':');
 508         offset = offset * 60 + readNumber(2);
 509         return sign * offset;
 510     }
 511 
 512     private boolean skipParentheses() {
 513         int parenCount = 1;
 514         while (pos < length && parenCount != 0) {
 515             final char c = string.charAt(pos++);
 516             if (c == '(') {
 517                 parenCount++;
 518             } else if (c == ')') {
 519                 parenCount--;
 520             }
 521         }
 522         return true;
 523     }
 524 
 525     private static int getDefaultValue(final int field) {
 526         switch (field) {
 527             case MONTH:
 528             case DAY:
 529                 return 1;
 530             default:
 531                 return 0;
 532         }
 533     }
 534 
 535     private static boolean isDay(final int n) {
 536         return 1 <= n && n <= 31;
 537     }
 538 
 539     private static boolean isMonth(final int n) {
 540         return 1 <= n && n <= 12;
 541     }
 542 
 543     private static boolean isHour(final int n) {
 544         return 0 <= n && n <= 24;
 545     }
 546 
 547     private static boolean isMinuteOrSecond(final int n) {
 548         return 0 <= n && n < 60;
 549     }
 550 
 551     private static boolean isMillisecond(final int n) {
 552         return 0<= n && n < 1000;
 553     }
 554 
 555     private boolean setMonth(final int m) {
 556         if (!isSet(MONTH)) {
 557             namedMonth = true;
 558             set(MONTH, m);
 559             return true;
 560         }
 561         return false;
 562     }
 563 
 564     private boolean setDateField(final int n) {
 565         for (int field = YEAR; field != HOUR; field++) {
 566             if (!isSet(field)) {
 567                 // no validation on legacy date fields
 568                 set(field, n);
 569                 return true;
 570             }
 571         }
 572         return false;
 573     }
 574 
 575     private boolean setTimeField(final int n) {
 576         for (int field = HOUR; field != TIMEZONE; field++) {
 577             if (!isSet(field)) {
 578                 if (checkLegacyField(field, n)) {
 579                     set(field, n);
 580                     return true;
 581                 }
 582                 return false;
 583             }
 584         }
 585         return false;
 586     }
 587 
 588     private boolean setTimezone(final int offset, final boolean asNumericOffset) {
 589         if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) {
 590             set(TIMEZONE, offset);
 591             return true;
 592         }
 593         return false;
 594     }
 595 
 596     private boolean setAmPm(final int offset) {
 597         if (!isSet(HOUR)) {
 598             return false;
 599         }
 600         final int hour = get(HOUR);
 601         if (hour >= 0 && hour <= 12) {
 602             set(HOUR, hour + offset);
 603         }
 604         return true;
 605     }
 606 
 607     private boolean patchResult(final boolean strict) {
 608         // sanity checks - make sure we have something
 609         if (!isSet(YEAR) && !isSet(HOUR)) {
 610             return false;
 611         }
 612         if (isSet(HOUR) && !isSet(MINUTE)) {
 613             return false;
 614         }
 615         // fill in default values for unset fields except timezone
 616         for (int field = YEAR; field <= TIMEZONE; field++) {
 617             if (get(field) == null) {
 618                 if (field == TIMEZONE && !strict) {
 619                     // We only use UTC as default timezone for dates parsed complying with
 620                     // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty
 621                     // and local timezone is used.
 622                     continue;
 623                 }
 624                 final int value = getDefaultValue(field);
 625                 set(field, value);
 626             }
 627         }
 628 
 629         if (!strict) {
 630             // swap year, month, and day if it looks like the right thing to do
 631             if (isDay(get(YEAR))) {
 632                 final int d = get(YEAR);
 633                 set(YEAR, get(DAY));
 634                 if (namedMonth) {
 635                     // d-m-y
 636                     set(DAY, d);
 637                 } else {
 638                     // m-d-y
 639                     final int d2 = get(MONTH);
 640                     set(MONTH, d);
 641                     set(DAY, d2);
 642                 }
 643             }
 644             // sanity checks now that we know what's what
 645             if (!isMonth(get(MONTH)) || !isDay(get(DAY))) {
 646                 return false;
 647             }
 648 
 649             // add 1900 or 2000 to year if it's between 0 and 100
 650             final int year = get(YEAR);
 651             if (year >= 0 && year < 100) {
 652                 set(YEAR, year >= 50 ? 1900 + year : 2000 + year);
 653             }
 654         } else {
 655             // 24 hour value is only allowed if all other time values are zero
 656             if (get(HOUR) == 24 &&
 657                     (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) {
 658                 return false;
 659             }
 660         }
 661 
 662         // set month to 0-based
 663         set(MONTH, get(MONTH) - 1);
 664         return true;
 665     }
 666 
 667     private static void addName(final String str, final int type, final int value) {
 668         final Name name = new Name(str, type, value);
 669         names.put(name.key, name);
 670     }
 671 
 672     private static class Name {
 673         final String name;
 674         final String key;
 675         final int value;
 676         final int type;
 677 
 678         final static int DAY_OF_WEEK    = -1;
 679         final static int MONTH_NAME     = 0;
 680         final static int AM_PM          = 1;
 681         final static int TIMEZONE_ID    = 2;
 682         final static int TIME_SEPARATOR = 3;
 683 
 684         Name(final String name, final int type, final int value) {
 685             assert name != null;
 686             assert name.equals(name.toLowerCase());
 687 
 688             this.name = name;
 689             // use first three characters as lookup key
 690             this.key = name.substring(0, Math.min(3, name.length()));
 691             this.type = type;
 692             this.value = value;
 693         }
 694 
 695         public boolean matches(final String str, final int offset, final int len) {
 696             return name.regionMatches(true, 0, str, offset, len);
 697         }
 698 
 699         @Override
 700         public String toString() {
 701             return name;
 702         }
 703     }
 704 
 705 }
--- EOF ---