1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.objects; 27 28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER; 29 import static java.lang.Character.LOWERCASE_LETTER; 30 import static java.lang.Character.OTHER_PUNCTUATION; 31 import static java.lang.Character.SPACE_SEPARATOR; 32 import static java.lang.Character.UPPERCASE_LETTER; 33 34 import java.util.HashMap; 35 36 /** 37 * JavaScript date parser. This class first tries to parse a date string 38 * according to the extended ISO 8601 format specified in ES5 15.9.1.15. 39 * If that fails, it falls back to legacy mode in which it accepts a range 40 * of different formats. 41 * 42 * <p>This class is neither thread-safe nor reusable. Calling the 43 * <tt>parse()</tt> method more than once will yield undefined results.</p> 44 */ 45 public class DateParser { 46 47 /** Constant for index position of parsed year value. */ 48 public final static int YEAR = 0; 49 /** Constant for index position of parsed month value. */ 50 public final static int MONTH = 1; 51 /** Constant for index position of parsed day value. */ 52 public final static int DAY = 2; 53 /** Constant for index position of parsed hour value. */ 54 public final static int HOUR = 3; 55 /** Constant for index position of parsed minute value. */ 56 public final static int MINUTE = 4; 57 /** Constant for index position of parsed second value. */ 58 public final static int SECOND = 5; 59 /** Constant for index position of parsed millisecond value. */ 60 public final static int MILLISECOND = 6; 61 /** Constant for index position of parsed time zone offset value. */ 62 public final static int TIMEZONE = 7; 63 64 private enum Token { 65 UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END 66 } 67 68 private final String string; 69 private final int length; 70 private final Integer[] fields; 71 private int pos = 0; 72 private Token token; 73 private int tokenLength; 74 private Name nameValue; 75 private int numValue; 76 private int currentField = YEAR; 77 private int yearSign = 0; 78 private boolean namedMonth = false; 79 80 private final static HashMap<String,Name> names = new HashMap<>(); 81 82 static { 83 addName("monday", Name.DAY_OF_WEEK, 0); 84 addName("tuesday", Name.DAY_OF_WEEK, 0); 85 addName("wednesday", Name.DAY_OF_WEEK, 0); 86 addName("thursday", Name.DAY_OF_WEEK, 0); 87 addName("friday", Name.DAY_OF_WEEK, 0); 88 addName("saturday", Name.DAY_OF_WEEK, 0); 89 addName("sunday", Name.DAY_OF_WEEK, 0); 90 addName("january", Name.MONTH_NAME, 1); 91 addName("february", Name.MONTH_NAME, 2); 92 addName("march", Name.MONTH_NAME, 3); 93 addName("april", Name.MONTH_NAME, 4); 94 addName("may", Name.MONTH_NAME, 5); 95 addName("june", Name.MONTH_NAME, 6); 96 addName("july", Name.MONTH_NAME, 7); 97 addName("august", Name.MONTH_NAME, 8); 98 addName("september", Name.MONTH_NAME, 9); 99 addName("october", Name.MONTH_NAME, 10); 100 addName("november", Name.MONTH_NAME, 11); 101 addName("december", Name.MONTH_NAME, 12); 102 addName("am", Name.AM_PM, 0); 103 addName("pm", Name.AM_PM, 12); 104 addName("z", Name.TIMEZONE_ID, 0); 105 addName("gmt", Name.TIMEZONE_ID, 0); 106 addName("ut", Name.TIMEZONE_ID, 0); 107 addName("utc", Name.TIMEZONE_ID, 0); 108 addName("est", Name.TIMEZONE_ID, -5 * 60); 109 addName("edt", Name.TIMEZONE_ID, -4 * 60); 110 addName("cst", Name.TIMEZONE_ID, -6 * 60); 111 addName("cdt", Name.TIMEZONE_ID, -5 * 60); 112 addName("mst", Name.TIMEZONE_ID, -7 * 60); 113 addName("mdt", Name.TIMEZONE_ID, -6 * 60); 114 addName("pst", Name.TIMEZONE_ID, -8 * 60); 115 addName("pdt", Name.TIMEZONE_ID, -7 * 60); 116 addName("t", Name.TIME_SEPARATOR, 0); 117 } 118 119 /** 120 * Construct a new <tt>DateParser</tt> instance for parsing the given string. 121 * @param string the string to be parsed 122 */ 123 public DateParser(final String string) { 124 this.string = string; 125 this.length = string.length(); 126 this.fields = new Integer[TIMEZONE + 1]; 127 } 128 129 /** 130 * Try parsing the given string as date according to the extended ISO 8601 format 131 * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. 132 * This method returns <tt>true</tt> if the string could be parsed. 133 * @return true if the string could be parsed as date 134 */ 135 public boolean parse() { 136 return parseEcmaDate() || parseLegacyDate(); 137 } 138 139 /** 140 * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. 141 * The date string must conform to the following format: 142 * 143 * <pre> [('-'|'+')yy]yyyy[-MM[-dd]][hh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre> 144 * 145 * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field 146 * is set to <tt>0</tt> (GMT).</p> 147 * @return true if string represents a valid ES5 date string. 148 */ 149 public boolean parseEcmaDate() { 150 151 if (token == null) { 152 token = next(); 153 } 154 155 while (token != Token.END) { 156 157 switch (token) { 158 case NUMBER: 159 if (currentField == YEAR && yearSign != 0) { 160 // 15.9.1.15.1 Extended year must have six digits 161 if (tokenLength != 6) { 162 return false; 163 } 164 numValue *= yearSign; 165 } else if (!checkEcmaField(currentField, numValue)) { 166 return false; 167 } 168 if (!skipEcmaDelimiter()) { 169 return false; 170 } 171 if (currentField < TIMEZONE) { 172 set(currentField++, numValue); 173 } 174 break; 175 176 case NAME: 177 if (nameValue == null) { 178 return false; 179 } 180 switch (nameValue.type) { 181 case Name.TIME_SEPARATOR: 182 if (currentField == YEAR || currentField > HOUR) { 183 return false; 184 } 185 currentField = HOUR; 186 break; 187 case Name.TIMEZONE_ID: 188 if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { 189 return false; 190 } 191 break; 192 default: 193 return false; 194 } 195 break; 196 197 case SIGN: 198 if (currentField == YEAR) { 199 yearSign = numValue; 200 } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { 201 // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds 202 return false; 203 } 204 break; 205 206 default: 207 return false; 208 } 209 token = next(); 210 } 211 212 return patchResult(true); 213 } 214 215 /** 216 * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. 217 * 218 * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a 219 * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values. 220 * The exact sequence of day, month, and year values to apply is determined heuristically.</p> 221 * 222 * <p>English month names and selected time zone names as well as AM/PM markers are recognized 223 * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or 224 * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset 225 * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p> 226 * 227 * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well. 228 * All other text causes parsing to fail.</p> 229 * 230 * @return true if the string could be parsed 231 */ 232 public boolean parseLegacyDate() { 233 234 if (yearSign != 0 || currentField > DAY) { 235 // we don't support signed years in legacy mode 236 return false; 237 } 238 if (token == null) { 239 token = next(); 240 } 241 242 while (token != Token.END) { 243 244 switch (token) { 245 case NUMBER: 246 if (skip(':')) { 247 // A number followed by ':' is parsed as time 248 if (!setTimeField(numValue)) { 249 return false; 250 } 251 // consume remaining time tokens 252 do { 253 token = next(); 254 if (token != Token.NUMBER || !setTimeField(numValue)) { 255 return false; 256 } 257 } while (skip(isSet(SECOND) ? '.' : ':')); 258 259 } else { 260 // Parse as date token 261 if (!setDateField(numValue)) { 262 return false; 263 } 264 skip('-'); 265 } 266 break; 267 268 case NAME: 269 if (nameValue == null) { 270 return false; 271 } 272 switch (nameValue.type) { 273 case Name.AM_PM: 274 if (!setAmPm(nameValue.value)) { 275 return false; 276 } 277 break; 278 case Name.MONTH_NAME: 279 if (!setMonth(nameValue.value)) { 280 return false; 281 } 282 break; 283 case Name.TIMEZONE_ID: 284 if (!setTimezone(nameValue.value, false)) { 285 return false; 286 } 287 break; 288 case Name.TIME_SEPARATOR: 289 return false; 290 default: 291 break; 292 } 293 if (nameValue.type != Name.TIMEZONE_ID) { 294 skip('-'); 295 } 296 break; 297 298 case SIGN: 299 if (!setTimezone(readTimeZoneOffset(), true)) { 300 return false; 301 } 302 break; 303 304 case PARENTHESIS: 305 if (!skipParentheses()) { 306 return false; 307 } 308 break; 309 310 case SEPARATOR: 311 break; 312 313 default: 314 return false; 315 } 316 token = next(); 317 } 318 319 return patchResult(false); 320 } 321 322 /** 323 * Get the parsed date and time fields as an array of <tt>Integers</tt>. 324 * 325 * <p>If parsing was successful, all fields are guaranteed to be set except for the 326 * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone 327 * offset should be applied.</p> 328 * 329 * @return the parsed date fields 330 */ 331 public Integer[] getDateFields() { 332 return fields; 333 } 334 335 private boolean isSet(final int field) { 336 return fields[field] != null; 337 } 338 339 private Integer get(final int field) { 340 return fields[field]; 341 } 342 343 private void set(final int field, final int value) { 344 fields[field] = value; 345 } 346 347 private int peek() { 348 return pos < length ? string.charAt(pos) : -1; 349 } 350 351 private boolean skip(final char c) { 352 if (pos < length && string.charAt(pos) == c) { 353 token = null; 354 pos++; 355 return true; 356 } 357 return false; 358 } 359 360 private Token next() { 361 if (pos >= length) { 362 tokenLength = 0; 363 return Token.END; 364 } 365 366 final char c = string.charAt(pos); 367 368 if (c > 0x80) { 369 tokenLength = 1; 370 pos++; 371 return Token.UNKNOWN; // We only deal with ASCII here 372 } 373 374 final int type = Character.getType(c); 375 switch (type) { 376 case DECIMAL_DIGIT_NUMBER: 377 numValue = readNumber(6); 378 return Token.NUMBER; 379 case SPACE_SEPARATOR : 380 case OTHER_PUNCTUATION: 381 tokenLength = 1; 382 pos++; 383 return Token.SEPARATOR; 384 case UPPERCASE_LETTER: 385 case LOWERCASE_LETTER: 386 nameValue = readName(); 387 return Token.NAME; 388 default: 389 tokenLength = 1; 390 pos++; 391 switch (c) { 392 case '(': 393 return Token.PARENTHESIS; 394 case '-': 395 case '+': 396 numValue = c == '-' ? -1 : 1; 397 return Token.SIGN; 398 default: 399 return Token.UNKNOWN; 400 } 401 } 402 } 403 404 private static boolean checkLegacyField(final int field, final int value) { 405 switch (field) { 406 case HOUR: 407 return isHour(value); 408 case MINUTE: 409 case SECOND: 410 return isMinuteOrSecond(value); 411 case MILLISECOND: 412 return isMillisecond(value); 413 default: 414 // skip validation on other legacy fields as we don't know what's what 415 return true; 416 } 417 } 418 419 private boolean checkEcmaField(final int field, final int value) { 420 switch (field) { 421 case YEAR: 422 return tokenLength == 4; 423 case MONTH: 424 return tokenLength == 2 && isMonth(value); 425 case DAY: 426 return tokenLength == 2 && isDay(value); 427 case HOUR: 428 return tokenLength == 2 && isHour(value); 429 case MINUTE: 430 case SECOND: 431 return tokenLength == 2 && isMinuteOrSecond(value); 432 case MILLISECOND: 433 // we allow millisecond to be less than 3 digits 434 return tokenLength < 4 && isMillisecond(value); 435 default: 436 return true; 437 } 438 } 439 440 private boolean skipEcmaDelimiter() { 441 switch (currentField) { 442 case YEAR: 443 case MONTH: 444 return skip('-') || peek() == 'T' || peek() == -1; 445 case DAY: 446 return peek() == 'T' || peek() == -1; 447 case HOUR: 448 case MINUTE: 449 return skip(':') || endOfTime(); 450 case SECOND: 451 return skip('.') || endOfTime(); 452 default: 453 return true; 454 } 455 } 456 457 private boolean endOfTime() { 458 final int c = peek(); 459 return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; 460 } 461 462 private static boolean isAsciiLetter(final char ch) { 463 return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); 464 } 465 466 private static boolean isAsciiDigit(final char ch) { 467 return '0' <= ch && ch <= '9'; 468 } 469 470 private int readNumber(final int maxDigits) { 471 final int start = pos; 472 int n = 0; 473 final int max = Math.min(length, pos + maxDigits); 474 while (pos < max && isAsciiDigit(string.charAt(pos))) { 475 n = n * 10 + string.charAt(pos++) - '0'; 476 } 477 tokenLength = pos - start; 478 return n; 479 } 480 481 private Name readName() { 482 final int start = pos; 483 final int limit = Math.min(pos + 3, length); 484 485 // first read up to the key length 486 while (pos < limit && isAsciiLetter(string.charAt(pos))) { 487 pos++; 488 } 489 final String key = string.substring(start, pos).toLowerCase(); 490 final Name name = names.get(key); 491 // then advance to end of name 492 while (pos < length && isAsciiLetter(string.charAt(pos))) { 493 pos++; 494 } 495 496 tokenLength = pos - start; 497 // make sure we have the full name or a prefix 498 if (name != null && name.matches(string, start, tokenLength)) { 499 return name; 500 } 501 return null; 502 } 503 504 private int readTimeZoneOffset() { 505 final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; 506 int offset = readNumber(2); 507 skip(':'); 508 offset = offset * 60 + readNumber(2); 509 return sign * offset; 510 } 511 512 private boolean skipParentheses() { 513 int parenCount = 1; 514 while (pos < length && parenCount != 0) { 515 final char c = string.charAt(pos++); 516 if (c == '(') { 517 parenCount++; 518 } else if (c == ')') { 519 parenCount--; 520 } 521 } 522 return true; 523 } 524 525 private static int getDefaultValue(final int field) { 526 switch (field) { 527 case MONTH: 528 case DAY: 529 return 1; 530 default: 531 return 0; 532 } 533 } 534 535 private static boolean isDay(final int n) { 536 return 1 <= n && n <= 31; 537 } 538 539 private static boolean isMonth(final int n) { 540 return 1 <= n && n <= 12; 541 } 542 543 private static boolean isHour(final int n) { 544 return 0 <= n && n <= 24; 545 } 546 547 private static boolean isMinuteOrSecond(final int n) { 548 return 0 <= n && n < 60; 549 } 550 551 private static boolean isMillisecond(final int n) { 552 return 0<= n && n < 1000; 553 } 554 555 private boolean setMonth(final int m) { 556 if (!isSet(MONTH)) { 557 namedMonth = true; 558 set(MONTH, m); 559 return true; 560 } 561 return false; 562 } 563 564 private boolean setDateField(final int n) { 565 for (int field = YEAR; field != HOUR; field++) { 566 if (!isSet(field)) { 567 // no validation on legacy date fields 568 set(field, n); 569 return true; 570 } 571 } 572 return false; 573 } 574 575 private boolean setTimeField(final int n) { 576 for (int field = HOUR; field != TIMEZONE; field++) { 577 if (!isSet(field)) { 578 if (checkLegacyField(field, n)) { 579 set(field, n); 580 return true; 581 } 582 return false; 583 } 584 } 585 return false; 586 } 587 588 private boolean setTimezone(final int offset, final boolean asNumericOffset) { 589 if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { 590 set(TIMEZONE, offset); 591 return true; 592 } 593 return false; 594 } 595 596 private boolean setAmPm(final int offset) { 597 if (!isSet(HOUR)) { 598 return false; 599 } 600 final int hour = get(HOUR); 601 if (hour >= 0 && hour <= 12) { 602 set(HOUR, hour + offset); 603 } 604 return true; 605 } 606 607 private boolean patchResult(final boolean strict) { 608 // sanity checks - make sure we have something 609 if (!isSet(YEAR) && !isSet(HOUR)) { 610 return false; 611 } 612 if (isSet(HOUR) && !isSet(MINUTE)) { 613 return false; 614 } 615 // fill in default values for unset fields except timezone 616 for (int field = YEAR; field <= TIMEZONE; field++) { 617 if (get(field) == null) { 618 if (field == TIMEZONE && !strict) { 619 // We only use UTC as default timezone for dates parsed complying with 620 // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty 621 // and local timezone is used. 622 continue; 623 } 624 final int value = getDefaultValue(field); 625 set(field, value); 626 } 627 } 628 629 if (!strict) { 630 // swap year, month, and day if it looks like the right thing to do 631 if (isDay(get(YEAR))) { 632 final int d = get(YEAR); 633 set(YEAR, get(DAY)); 634 if (namedMonth) { 635 // d-m-y 636 set(DAY, d); 637 } else { 638 // m-d-y 639 final int d2 = get(MONTH); 640 set(MONTH, d); 641 set(DAY, d2); 642 } 643 } 644 // sanity checks now that we know what's what 645 if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { 646 return false; 647 } 648 649 // add 1900 or 2000 to year if it's between 0 and 100 650 final int year = get(YEAR); 651 if (year >= 0 && year < 100) { 652 set(YEAR, year >= 50 ? 1900 + year : 2000 + year); 653 } 654 } else { 655 // 24 hour value is only allowed if all other time values are zero 656 if (get(HOUR) == 24 && 657 (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { 658 return false; 659 } 660 } 661 662 // set month to 0-based 663 set(MONTH, get(MONTH) - 1); 664 return true; 665 } 666 667 private static void addName(final String str, final int type, final int value) { 668 final Name name = new Name(str, type, value); 669 names.put(name.key, name); 670 } 671 672 private static class Name { 673 final String name; 674 final String key; 675 final int value; 676 final int type; 677 678 final static int DAY_OF_WEEK = -1; 679 final static int MONTH_NAME = 0; 680 final static int AM_PM = 1; 681 final static int TIMEZONE_ID = 2; 682 final static int TIME_SEPARATOR = 3; 683 684 Name(final String name, final int type, final int value) { 685 assert name != null; 686 assert name.equals(name.toLowerCase()); 687 688 this.name = name; 689 // use first three characters as lookup key 690 this.key = name.substring(0, Math.min(3, name.length())); 691 this.type = type; 692 this.value = value; 693 } 694 695 public boolean matches(final String str, final int offset, final int len) { 696 return name.regionMatches(true, 0, str, offset, len); 697 } 698 699 @Override 700 public String toString() { 701 return name; 702 } 703 } 704 705 }