1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.parser; 27 28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER; 29 import static java.lang.Character.LOWERCASE_LETTER; 30 import static java.lang.Character.OTHER_PUNCTUATION; 31 import static java.lang.Character.SPACE_SEPARATOR; 32 import static java.lang.Character.UPPERCASE_LETTER; 33 34 import java.util.HashMap; 35 import java.util.Locale; 36 37 /** 38 * JavaScript date parser. This class first tries to parse a date string 39 * according to the extended ISO 8601 format specified in ES5 15.9.1.15. 40 * If that fails, it falls back to legacy mode in which it accepts a range 41 * of different formats. 42 * 43 * <p>This class is neither thread-safe nor reusable. Calling the 44 * <code>parse()</code> method more than once will yield undefined results.</p> 45 */ 46 public class DateParser { 47 48 /** Constant for index position of parsed year value. */ 49 public final static int YEAR = 0; 50 /** Constant for index position of parsed month value. */ 51 public final static int MONTH = 1; 52 /** Constant for index position of parsed day value. */ 53 public final static int DAY = 2; 54 /** Constant for index position of parsed hour value. */ 55 public final static int HOUR = 3; 56 /** Constant for index position of parsed minute value. */ 57 public final static int MINUTE = 4; 58 /** Constant for index position of parsed second value. */ 59 public final static int SECOND = 5; 60 /** Constant for index position of parsed millisecond value. */ 61 public final static int MILLISECOND = 6; 62 /** Constant for index position of parsed time zone offset value. */ 63 public final static int TIMEZONE = 7; 64 65 private enum Token { 66 UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END 67 } 68 69 private final String string; 70 private final int length; 71 private final Integer[] fields; 72 private int pos = 0; 73 private Token token; 74 private int tokenLength; 75 private Name nameValue; 76 private int numValue; 77 private int currentField = YEAR; 78 private int yearSign = 0; 79 private boolean namedMonth = false; 80 81 private final static HashMap<String,Name> names = new HashMap<>(); 82 83 static { 84 addName("monday", Name.DAY_OF_WEEK, 0); 85 addName("tuesday", Name.DAY_OF_WEEK, 0); 86 addName("wednesday", Name.DAY_OF_WEEK, 0); 87 addName("thursday", Name.DAY_OF_WEEK, 0); 88 addName("friday", Name.DAY_OF_WEEK, 0); 89 addName("saturday", Name.DAY_OF_WEEK, 0); 90 addName("sunday", Name.DAY_OF_WEEK, 0); 91 addName("january", Name.MONTH_NAME, 1); 92 addName("february", Name.MONTH_NAME, 2); 93 addName("march", Name.MONTH_NAME, 3); 94 addName("april", Name.MONTH_NAME, 4); 95 addName("may", Name.MONTH_NAME, 5); 96 addName("june", Name.MONTH_NAME, 6); 97 addName("july", Name.MONTH_NAME, 7); 98 addName("august", Name.MONTH_NAME, 8); 99 addName("september", Name.MONTH_NAME, 9); 100 addName("october", Name.MONTH_NAME, 10); 101 addName("november", Name.MONTH_NAME, 11); 102 addName("december", Name.MONTH_NAME, 12); 103 addName("am", Name.AM_PM, 0); 104 addName("pm", Name.AM_PM, 12); 105 addName("z", Name.TIMEZONE_ID, 0); 106 addName("gmt", Name.TIMEZONE_ID, 0); 107 addName("ut", Name.TIMEZONE_ID, 0); 108 addName("utc", Name.TIMEZONE_ID, 0); 109 addName("est", Name.TIMEZONE_ID, -5 * 60); 110 addName("edt", Name.TIMEZONE_ID, -4 * 60); 111 addName("cst", Name.TIMEZONE_ID, -6 * 60); 112 addName("cdt", Name.TIMEZONE_ID, -5 * 60); 113 addName("mst", Name.TIMEZONE_ID, -7 * 60); 114 addName("mdt", Name.TIMEZONE_ID, -6 * 60); 115 addName("pst", Name.TIMEZONE_ID, -8 * 60); 116 addName("pdt", Name.TIMEZONE_ID, -7 * 60); 117 addName("t", Name.TIME_SEPARATOR, 0); 118 } 119 120 /** 121 * Construct a new <code>DateParser</code> instance for parsing the given string. 122 * @param string the string to be parsed 123 */ 124 public DateParser(final String string) { 125 this.string = string; 126 this.length = string.length(); 127 this.fields = new Integer[TIMEZONE + 1]; 128 } 129 130 /** 131 * Try parsing the given string as date according to the extended ISO 8601 format 132 * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. 133 * This method returns <code>true</code> if the string could be parsed. 134 * @return true if the string could be parsed as date 135 */ 136 public boolean parse() { 137 return parseEcmaDate() || parseLegacyDate(); 138 } 139 140 /** 141 * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. 142 * The date string must conform to the following format: 143 * 144 * <pre> [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre> 145 * 146 * <p>If the string does not contain a time zone offset, the <code>TIMEZONE</code> field 147 * is set to <code>0</code> (GMT).</p> 148 * @return true if string represents a valid ES5 date string. 149 */ 150 public boolean parseEcmaDate() { 151 152 if (token == null) { 153 token = next(); 154 } 155 156 while (token != Token.END) { 157 158 switch (token) { 159 case NUMBER: 160 if (currentField == YEAR && yearSign != 0) { 161 // 15.9.1.15.1 Extended year must have six digits 162 if (tokenLength != 6) { 163 return false; 164 } 165 numValue *= yearSign; 166 } else if (!checkEcmaField(currentField, numValue)) { 167 return false; 168 } 169 if (!skipEcmaDelimiter()) { 170 return false; 171 } 172 if (currentField < TIMEZONE) { 173 set(currentField++, numValue); 174 } 175 break; 176 177 case NAME: 178 if (nameValue == null) { 179 return false; 180 } 181 switch (nameValue.type) { 182 case Name.TIME_SEPARATOR: 183 if (currentField == YEAR || currentField > HOUR) { 184 return false; 185 } 186 currentField = HOUR; 187 break; 188 case Name.TIMEZONE_ID: 189 if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { 190 return false; 191 } 192 break; 193 default: 194 return false; 195 } 196 break; 197 198 case SIGN: 199 if (peek() == -1) { 200 // END after sign - wrong! 201 return false; 202 } 203 204 if (currentField == YEAR) { 205 yearSign = numValue; 206 } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { 207 // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds 208 return false; 209 } 210 break; 211 212 default: 213 return false; 214 } 215 token = next(); 216 } 217 218 return patchResult(true); 219 } 220 221 /** 222 * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. 223 * 224 * <p>Numbers separated by <code>':'</code> are treated as time values, optionally followed by a 225 * millisecond value separated by <code>'.'</code>. Other number values are treated as date values. 226 * The exact sequence of day, month, and year values to apply is determined heuristically.</p> 227 * 228 * <p>English month names and selected time zone names as well as AM/PM markers are recognized 229 * and handled properly. Additionally, numeric time zone offsets such as <code>(+|-)hh:mm</code> or 230 * <code>(+|-)hhmm</code> are recognized. If the string does not contain a time zone offset 231 * the <code>TIMEZONE</code>field is left undefined, meaning the local time zone should be applied.</p> 232 * 233 * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well. 234 * All other text causes parsing to fail.</p> 235 * 236 * @return true if the string could be parsed 237 */ 238 public boolean parseLegacyDate() { 239 240 if (yearSign != 0 || currentField > DAY) { 241 // we don't support signed years in legacy mode 242 return false; 243 } 244 if (token == null) { 245 token = next(); 246 } 247 248 while (token != Token.END) { 249 250 switch (token) { 251 case NUMBER: 252 if (skipDelimiter(':')) { 253 // A number followed by ':' is parsed as time 254 if (!setTimeField(numValue)) { 255 return false; 256 } 257 // consume remaining time tokens 258 do { 259 token = next(); 260 if (token != Token.NUMBER || !setTimeField(numValue)) { 261 return false; 262 } 263 } while (skipDelimiter(isSet(SECOND) ? '.' : ':')); 264 265 } else { 266 // Parse as date token 267 if (!setDateField(numValue)) { 268 return false; 269 } 270 skipDelimiter('-'); 271 } 272 break; 273 274 case NAME: 275 if (nameValue == null) { 276 return false; 277 } 278 switch (nameValue.type) { 279 case Name.AM_PM: 280 if (!setAmPm(nameValue.value)) { 281 return false; 282 } 283 break; 284 case Name.MONTH_NAME: 285 if (!setMonth(nameValue.value)) { 286 return false; 287 } 288 break; 289 case Name.TIMEZONE_ID: 290 if (!setTimezone(nameValue.value, false)) { 291 return false; 292 } 293 break; 294 case Name.TIME_SEPARATOR: 295 return false; 296 default: 297 break; 298 } 299 if (nameValue.type != Name.TIMEZONE_ID) { 300 skipDelimiter('-'); 301 } 302 break; 303 304 case SIGN: 305 if (peek() == -1) { 306 // END after sign - wrong! 307 return false; 308 } 309 310 if (!setTimezone(readTimeZoneOffset(), true)) { 311 return false; 312 } 313 break; 314 315 case PARENTHESIS: 316 if (!skipParentheses()) { 317 return false; 318 } 319 break; 320 321 case SEPARATOR: 322 break; 323 324 default: 325 return false; 326 } 327 token = next(); 328 } 329 330 return patchResult(false); 331 } 332 333 /** 334 * Get the parsed date and time fields as an array of <code>Integers</code>. 335 * 336 * <p>If parsing was successful, all fields are guaranteed to be set except for the 337 * <code>TIMEZONE</code> field which may be <code>null</code>, meaning that local time zone 338 * offset should be applied.</p> 339 * 340 * @return the parsed date fields 341 */ 342 public Integer[] getDateFields() { 343 return fields; 344 } 345 346 private boolean isSet(final int field) { 347 return fields[field] != null; 348 } 349 350 private Integer get(final int field) { 351 return fields[field]; 352 } 353 354 private void set(final int field, final int value) { 355 fields[field] = value; 356 } 357 358 private int peek() { 359 return pos < length ? string.charAt(pos) : -1; 360 } 361 362 // Skip delimiter if followed by a number. Used for ISO 8601 formatted dates 363 private boolean skipNumberDelimiter(final char c) { 364 if (pos < length - 1 && string.charAt(pos) == c 365 && Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) { 366 token = null; 367 pos++; 368 return true; 369 } 370 return false; 371 } 372 373 private boolean skipDelimiter(final char c) { 374 if (pos < length && string.charAt(pos) == c) { 375 token = null; 376 pos++; 377 return true; 378 } 379 return false; 380 } 381 382 private Token next() { 383 if (pos >= length) { 384 tokenLength = 0; 385 return Token.END; 386 } 387 388 final char c = string.charAt(pos); 389 390 if (c > 0x80) { 391 tokenLength = 1; 392 pos++; 393 return Token.UNKNOWN; // We only deal with ASCII here 394 } 395 396 final int type = Character.getType(c); 397 switch (type) { 398 case DECIMAL_DIGIT_NUMBER: 399 numValue = readNumber(6); 400 return Token.NUMBER; 401 case SPACE_SEPARATOR : 402 case OTHER_PUNCTUATION: 403 tokenLength = 1; 404 pos++; 405 return Token.SEPARATOR; 406 case UPPERCASE_LETTER: 407 case LOWERCASE_LETTER: 408 nameValue = readName(); 409 return Token.NAME; 410 default: 411 tokenLength = 1; 412 pos++; 413 switch (c) { 414 case '(': 415 return Token.PARENTHESIS; 416 case '-': 417 case '+': 418 numValue = c == '-' ? -1 : 1; 419 return Token.SIGN; 420 default: 421 return Token.UNKNOWN; 422 } 423 } 424 } 425 426 private static boolean checkLegacyField(final int field, final int value) { 427 switch (field) { 428 case HOUR: 429 return isHour(value); 430 case MINUTE: 431 case SECOND: 432 return isMinuteOrSecond(value); 433 case MILLISECOND: 434 return isMillisecond(value); 435 default: 436 // skip validation on other legacy fields as we don't know what's what 437 return true; 438 } 439 } 440 441 private boolean checkEcmaField(final int field, final int value) { 442 switch (field) { 443 case YEAR: 444 return tokenLength == 4; 445 case MONTH: 446 return tokenLength == 2 && isMonth(value); 447 case DAY: 448 return tokenLength == 2 && isDay(value); 449 case HOUR: 450 return tokenLength == 2 && isHour(value); 451 case MINUTE: 452 case SECOND: 453 return tokenLength == 2 && isMinuteOrSecond(value); 454 case MILLISECOND: 455 // we allow millisecond to be less than 3 digits 456 return tokenLength < 4 && isMillisecond(value); 457 default: 458 return true; 459 } 460 } 461 462 private boolean skipEcmaDelimiter() { 463 switch (currentField) { 464 case YEAR: 465 case MONTH: 466 return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1; 467 case DAY: 468 return peek() == 'T' || peek() == -1; 469 case HOUR: 470 case MINUTE: 471 return skipNumberDelimiter(':') || endOfTime(); 472 case SECOND: 473 return skipNumberDelimiter('.') || endOfTime(); 474 default: 475 return true; 476 } 477 } 478 479 private boolean endOfTime() { 480 final int c = peek(); 481 return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; 482 } 483 484 private static boolean isAsciiLetter(final char ch) { 485 return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); 486 } 487 488 private static boolean isAsciiDigit(final char ch) { 489 return '0' <= ch && ch <= '9'; 490 } 491 492 private int readNumber(final int maxDigits) { 493 final int start = pos; 494 int n = 0; 495 final int max = Math.min(length, pos + maxDigits); 496 while (pos < max && isAsciiDigit(string.charAt(pos))) { 497 n = n * 10 + string.charAt(pos++) - '0'; 498 } 499 tokenLength = pos - start; 500 return n; 501 } 502 503 private Name readName() { 504 final int start = pos; 505 final int limit = Math.min(pos + 3, length); 506 507 // first read up to the key length 508 while (pos < limit && isAsciiLetter(string.charAt(pos))) { 509 pos++; 510 } 511 final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH); 512 final Name name = names.get(key); 513 // then advance to end of name 514 while (pos < length && isAsciiLetter(string.charAt(pos))) { 515 pos++; 516 } 517 518 tokenLength = pos - start; 519 // make sure we have the full name or a prefix 520 if (name != null && name.matches(string, start, tokenLength)) { 521 return name; 522 } 523 return null; 524 } 525 526 private int readTimeZoneOffset() { 527 final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; 528 int offset = readNumber(2); 529 skipDelimiter(':'); 530 offset = offset * 60 + readNumber(2); 531 return sign * offset; 532 } 533 534 private boolean skipParentheses() { 535 int parenCount = 1; 536 while (pos < length && parenCount != 0) { 537 final char c = string.charAt(pos++); 538 if (c == '(') { 539 parenCount++; 540 } else if (c == ')') { 541 parenCount--; 542 } 543 } 544 return true; 545 } 546 547 private static int getDefaultValue(final int field) { 548 switch (field) { 549 case MONTH: 550 case DAY: 551 return 1; 552 default: 553 return 0; 554 } 555 } 556 557 private static boolean isDay(final int n) { 558 return 1 <= n && n <= 31; 559 } 560 561 private static boolean isMonth(final int n) { 562 return 1 <= n && n <= 12; 563 } 564 565 private static boolean isHour(final int n) { 566 return 0 <= n && n <= 24; 567 } 568 569 private static boolean isMinuteOrSecond(final int n) { 570 return 0 <= n && n < 60; 571 } 572 573 private static boolean isMillisecond(final int n) { 574 return 0<= n && n < 1000; 575 } 576 577 private boolean setMonth(final int m) { 578 if (!isSet(MONTH)) { 579 namedMonth = true; 580 set(MONTH, m); 581 return true; 582 } 583 return false; 584 } 585 586 private boolean setDateField(final int n) { 587 for (int field = YEAR; field != HOUR; field++) { 588 if (!isSet(field)) { 589 // no validation on legacy date fields 590 set(field, n); 591 return true; 592 } 593 } 594 return false; 595 } 596 597 private boolean setTimeField(final int n) { 598 for (int field = HOUR; field != TIMEZONE; field++) { 599 if (!isSet(field)) { 600 if (checkLegacyField(field, n)) { 601 set(field, n); 602 return true; 603 } 604 return false; 605 } 606 } 607 return false; 608 } 609 610 private boolean setTimezone(final int offset, final boolean asNumericOffset) { 611 if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { 612 set(TIMEZONE, offset); 613 return true; 614 } 615 return false; 616 } 617 618 private boolean setAmPm(final int offset) { 619 if (!isSet(HOUR)) { 620 return false; 621 } 622 final int hour = get(HOUR); 623 if (hour >= 0 && hour <= 12) { 624 set(HOUR, hour + offset); 625 } 626 return true; 627 } 628 629 private boolean patchResult(final boolean strict) { 630 // sanity checks - make sure we have something 631 if (!isSet(YEAR) && !isSet(HOUR)) { 632 return false; 633 } 634 if (isSet(HOUR) && !isSet(MINUTE)) { 635 return false; 636 } 637 // fill in default values for unset fields except timezone 638 for (int field = YEAR; field <= TIMEZONE; field++) { 639 if (get(field) == null) { 640 if (field == TIMEZONE && !strict) { 641 // We only use UTC as default timezone for dates parsed complying with 642 // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty 643 // and local timezone is used. 644 continue; 645 } 646 final int value = getDefaultValue(field); 647 set(field, value); 648 } 649 } 650 651 if (!strict) { 652 // swap year, month, and day if it looks like the right thing to do 653 if (isDay(get(YEAR))) { 654 final int d = get(YEAR); 655 set(YEAR, get(DAY)); 656 if (namedMonth) { 657 // d-m-y 658 set(DAY, d); 659 } else { 660 // m-d-y 661 final int d2 = get(MONTH); 662 set(MONTH, d); 663 set(DAY, d2); 664 } 665 } 666 // sanity checks now that we know what's what 667 if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { 668 return false; 669 } 670 671 // add 1900 or 2000 to year if it's between 0 and 100 672 final int year = get(YEAR); 673 if (year >= 0 && year < 100) { 674 set(YEAR, year >= 50 ? 1900 + year : 2000 + year); 675 } 676 } else { 677 // 24 hour value is only allowed if all other time values are zero 678 if (get(HOUR) == 24 && 679 (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { 680 return false; 681 } 682 } 683 684 // set month to 0-based 685 set(MONTH, get(MONTH) - 1); 686 return true; 687 } 688 689 private static void addName(final String str, final int type, final int value) { 690 final Name name = new Name(str, type, value); 691 names.put(name.key, name); 692 } 693 694 private static class Name { 695 final String name; 696 final String key; 697 final int value; 698 final int type; 699 700 final static int DAY_OF_WEEK = -1; 701 final static int MONTH_NAME = 0; 702 final static int AM_PM = 1; 703 final static int TIMEZONE_ID = 2; 704 final static int TIME_SEPARATOR = 3; 705 706 Name(final String name, final int type, final int value) { 707 assert name != null; 708 assert name.equals(name.toLowerCase(Locale.ENGLISH)); 709 710 this.name = name; 711 // use first three characters as lookup key 712 this.key = name.substring(0, Math.min(3, name.length())); 713 this.type = type; 714 this.value = value; 715 } 716 717 public boolean matches(final String str, final int offset, final int len) { 718 return name.regionMatches(true, 0, str, offset, len); 719 } 720 721 @Override 722 public String toString() { 723 return name; 724 } 725 } 726 727 }