1 /* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package jdk.nashorn.internal.objects; 27 28 import static java.lang.Character.DECIMAL_DIGIT_NUMBER; 29 import static java.lang.Character.LOWERCASE_LETTER; 30 import static java.lang.Character.OTHER_PUNCTUATION; 31 import static java.lang.Character.SPACE_SEPARATOR; 32 import static java.lang.Character.UPPERCASE_LETTER; 33 34 import java.util.HashMap; 35 import java.util.Locale; 36 37 /** 38 * JavaScript date parser. This class first tries to parse a date string 39 * according to the extended ISO 8601 format specified in ES5 15.9.1.15. 40 * If that fails, it falls back to legacy mode in which it accepts a range 41 * of different formats. 42 * 43 * <p>This class is neither thread-safe nor reusable. Calling the 44 * <tt>parse()</tt> method more than once will yield undefined results.</p> 45 */ 46 public class DateParser { 47 48 /** Constant for index position of parsed year value. */ 49 public final static int YEAR = 0; 50 /** Constant for index position of parsed month value. */ 51 public final static int MONTH = 1; 52 /** Constant for index position of parsed day value. */ 53 public final static int DAY = 2; 54 /** Constant for index position of parsed hour value. */ 55 public final static int HOUR = 3; 56 /** Constant for index position of parsed minute value. */ 57 public final static int MINUTE = 4; 58 /** Constant for index position of parsed second value. */ 59 public final static int SECOND = 5; 60 /** Constant for index position of parsed millisecond value. */ 61 public final static int MILLISECOND = 6; 62 /** Constant for index position of parsed time zone offset value. */ 63 public final static int TIMEZONE = 7; 64 65 private enum Token { 66 UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END 67 } 68 69 private final String string; 70 private final int length; 71 private final Integer[] fields; 72 private int pos = 0; 73 private Token token; 74 private int tokenLength; 75 private Name nameValue; 76 private int numValue; 77 private int currentField = YEAR; 78 private int yearSign = 0; 79 private boolean namedMonth = false; 80 81 private final static HashMap<String,Name> names = new HashMap<>(); 82 83 static { 84 addName("monday", Name.DAY_OF_WEEK, 0); 85 addName("tuesday", Name.DAY_OF_WEEK, 0); 86 addName("wednesday", Name.DAY_OF_WEEK, 0); 87 addName("thursday", Name.DAY_OF_WEEK, 0); 88 addName("friday", Name.DAY_OF_WEEK, 0); 89 addName("saturday", Name.DAY_OF_WEEK, 0); 90 addName("sunday", Name.DAY_OF_WEEK, 0); 91 addName("january", Name.MONTH_NAME, 1); 92 addName("february", Name.MONTH_NAME, 2); 93 addName("march", Name.MONTH_NAME, 3); 94 addName("april", Name.MONTH_NAME, 4); 95 addName("may", Name.MONTH_NAME, 5); 96 addName("june", Name.MONTH_NAME, 6); 97 addName("july", Name.MONTH_NAME, 7); 98 addName("august", Name.MONTH_NAME, 8); 99 addName("september", Name.MONTH_NAME, 9); 100 addName("october", Name.MONTH_NAME, 10); 101 addName("november", Name.MONTH_NAME, 11); 102 addName("december", Name.MONTH_NAME, 12); 103 addName("am", Name.AM_PM, 0); 104 addName("pm", Name.AM_PM, 12); 105 addName("z", Name.TIMEZONE_ID, 0); 106 addName("gmt", Name.TIMEZONE_ID, 0); 107 addName("ut", Name.TIMEZONE_ID, 0); 108 addName("utc", Name.TIMEZONE_ID, 0); 109 addName("est", Name.TIMEZONE_ID, -5 * 60); 110 addName("edt", Name.TIMEZONE_ID, -4 * 60); 111 addName("cst", Name.TIMEZONE_ID, -6 * 60); 112 addName("cdt", Name.TIMEZONE_ID, -5 * 60); 113 addName("mst", Name.TIMEZONE_ID, -7 * 60); 114 addName("mdt", Name.TIMEZONE_ID, -6 * 60); 115 addName("pst", Name.TIMEZONE_ID, -8 * 60); 116 addName("pdt", Name.TIMEZONE_ID, -7 * 60); 117 addName("t", Name.TIME_SEPARATOR, 0); 118 } 119 120 /** 121 * Construct a new <tt>DateParser</tt> instance for parsing the given string. 122 * @param string the string to be parsed 123 */ 124 public DateParser(final String string) { 125 this.string = string; 126 this.length = string.length(); 127 this.fields = new Integer[TIMEZONE + 1]; 128 } 129 130 /** 131 * Try parsing the given string as date according to the extended ISO 8601 format 132 * specified in ES5 15.9.1.15. Fall back to legacy mode if that fails. 133 * This method returns <tt>true</tt> if the string could be parsed. 134 * @return true if the string could be parsed as date 135 */ 136 public boolean parse() { 137 return parseEcmaDate() || parseLegacyDate(); 138 } 139 140 /** 141 * Try parsing the date string according to the rules laid out in ES5 15.9.1.15. 142 * The date string must conform to the following format: 143 * 144 * <pre> [('-'|'+')yy]yyyy[-MM[-dd]][hh:mm[:ss[.sss]][Z|(+|-)hh:mm]] </pre> 145 * 146 * <p>If the string does not contain a time zone offset, the <tt>TIMEZONE</tt> field 147 * is set to <tt>0</tt> (GMT).</p> 148 * @return true if string represents a valid ES5 date string. 149 */ 150 public boolean parseEcmaDate() { 151 152 if (token == null) { 153 token = next(); 154 } 155 156 while (token != Token.END) { 157 158 switch (token) { 159 case NUMBER: 160 if (currentField == YEAR && yearSign != 0) { 161 // 15.9.1.15.1 Extended year must have six digits 162 if (tokenLength != 6) { 163 return false; 164 } 165 numValue *= yearSign; 166 } else if (!checkEcmaField(currentField, numValue)) { 167 return false; 168 } 169 if (!skipEcmaDelimiter()) { 170 return false; 171 } 172 if (currentField < TIMEZONE) { 173 set(currentField++, numValue); 174 } 175 break; 176 177 case NAME: 178 if (nameValue == null) { 179 return false; 180 } 181 switch (nameValue.type) { 182 case Name.TIME_SEPARATOR: 183 if (currentField == YEAR || currentField > HOUR) { 184 return false; 185 } 186 currentField = HOUR; 187 break; 188 case Name.TIMEZONE_ID: 189 if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) { 190 return false; 191 } 192 break; 193 default: 194 return false; 195 } 196 break; 197 198 case SIGN: 199 if (currentField == YEAR) { 200 yearSign = numValue; 201 } else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) { 202 // Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds 203 return false; 204 } 205 break; 206 207 default: 208 return false; 209 } 210 token = next(); 211 } 212 213 return patchResult(true); 214 } 215 216 /** 217 * Try parsing the date using a fuzzy algorithm that can handle a variety of formats. 218 * 219 * <p>Numbers separated by <tt>':'</tt> are treated as time values, optionally followed by a 220 * millisecond value separated by <tt>'.'</tt>. Other number values are treated as date values. 221 * The exact sequence of day, month, and year values to apply is determined heuristically.</p> 222 * 223 * <p>English month names and selected time zone names as well as AM/PM markers are recognized 224 * and handled properly. Additionally, numeric time zone offsets such as <tt>(+|-)hh:mm</tt> or 225 * <tt>(+|-)hhmm</tt> are recognized. If the string does not contain a time zone offset 226 * the <tt>TIMEZONE</tt>field is left undefined, meaning the local time zone should be applied.</p> 227 * 228 * <p>English weekday names are recognized but ignored. All text in parentheses is ignored as well. 229 * All other text causes parsing to fail.</p> 230 * 231 * @return true if the string could be parsed 232 */ 233 public boolean parseLegacyDate() { 234 235 if (yearSign != 0 || currentField > DAY) { 236 // we don't support signed years in legacy mode 237 return false; 238 } 239 if (token == null) { 240 token = next(); 241 } 242 243 while (token != Token.END) { 244 245 switch (token) { 246 case NUMBER: 247 if (skip(':')) { 248 // A number followed by ':' is parsed as time 249 if (!setTimeField(numValue)) { 250 return false; 251 } 252 // consume remaining time tokens 253 do { 254 token = next(); 255 if (token != Token.NUMBER || !setTimeField(numValue)) { 256 return false; 257 } 258 } while (skip(isSet(SECOND) ? '.' : ':')); 259 260 } else { 261 // Parse as date token 262 if (!setDateField(numValue)) { 263 return false; 264 } 265 skip('-'); 266 } 267 break; 268 269 case NAME: 270 if (nameValue == null) { 271 return false; 272 } 273 switch (nameValue.type) { 274 case Name.AM_PM: 275 if (!setAmPm(nameValue.value)) { 276 return false; 277 } 278 break; 279 case Name.MONTH_NAME: 280 if (!setMonth(nameValue.value)) { 281 return false; 282 } 283 break; 284 case Name.TIMEZONE_ID: 285 if (!setTimezone(nameValue.value, false)) { 286 return false; 287 } 288 break; 289 case Name.TIME_SEPARATOR: 290 return false; 291 default: 292 break; 293 } 294 if (nameValue.type != Name.TIMEZONE_ID) { 295 skip('-'); 296 } 297 break; 298 299 case SIGN: 300 if (!setTimezone(readTimeZoneOffset(), true)) { 301 return false; 302 } 303 break; 304 305 case PARENTHESIS: 306 if (!skipParentheses()) { 307 return false; 308 } 309 break; 310 311 case SEPARATOR: 312 break; 313 314 default: 315 return false; 316 } 317 token = next(); 318 } 319 320 return patchResult(false); 321 } 322 323 /** 324 * Get the parsed date and time fields as an array of <tt>Integers</tt>. 325 * 326 * <p>If parsing was successful, all fields are guaranteed to be set except for the 327 * <tt>TIMEZONE</tt> field which may be <tt>null</tt>, meaning that local time zone 328 * offset should be applied.</p> 329 * 330 * @return the parsed date fields 331 */ 332 public Integer[] getDateFields() { 333 return fields; 334 } 335 336 private boolean isSet(final int field) { 337 return fields[field] != null; 338 } 339 340 private Integer get(final int field) { 341 return fields[field]; 342 } 343 344 private void set(final int field, final int value) { 345 fields[field] = value; 346 } 347 348 private int peek() { 349 return pos < length ? string.charAt(pos) : -1; 350 } 351 352 private boolean skip(final char c) { 353 if (pos < length && string.charAt(pos) == c) { 354 token = null; 355 pos++; 356 return true; 357 } 358 return false; 359 } 360 361 private Token next() { 362 if (pos >= length) { 363 tokenLength = 0; 364 return Token.END; 365 } 366 367 final char c = string.charAt(pos); 368 369 if (c > 0x80) { 370 tokenLength = 1; 371 pos++; 372 return Token.UNKNOWN; // We only deal with ASCII here 373 } 374 375 final int type = Character.getType(c); 376 switch (type) { 377 case DECIMAL_DIGIT_NUMBER: 378 numValue = readNumber(6); 379 return Token.NUMBER; 380 case SPACE_SEPARATOR : 381 case OTHER_PUNCTUATION: 382 tokenLength = 1; 383 pos++; 384 return Token.SEPARATOR; 385 case UPPERCASE_LETTER: 386 case LOWERCASE_LETTER: 387 nameValue = readName(); 388 return Token.NAME; 389 default: 390 tokenLength = 1; 391 pos++; 392 switch (c) { 393 case '(': 394 return Token.PARENTHESIS; 395 case '-': 396 case '+': 397 numValue = c == '-' ? -1 : 1; 398 return Token.SIGN; 399 default: 400 return Token.UNKNOWN; 401 } 402 } 403 } 404 405 private static boolean checkLegacyField(final int field, final int value) { 406 switch (field) { 407 case HOUR: 408 return isHour(value); 409 case MINUTE: 410 case SECOND: 411 return isMinuteOrSecond(value); 412 case MILLISECOND: 413 return isMillisecond(value); 414 default: 415 // skip validation on other legacy fields as we don't know what's what 416 return true; 417 } 418 } 419 420 private boolean checkEcmaField(final int field, final int value) { 421 switch (field) { 422 case YEAR: 423 return tokenLength == 4; 424 case MONTH: 425 return tokenLength == 2 && isMonth(value); 426 case DAY: 427 return tokenLength == 2 && isDay(value); 428 case HOUR: 429 return tokenLength == 2 && isHour(value); 430 case MINUTE: 431 case SECOND: 432 return tokenLength == 2 && isMinuteOrSecond(value); 433 case MILLISECOND: 434 // we allow millisecond to be less than 3 digits 435 return tokenLength < 4 && isMillisecond(value); 436 default: 437 return true; 438 } 439 } 440 441 private boolean skipEcmaDelimiter() { 442 switch (currentField) { 443 case YEAR: 444 case MONTH: 445 return skip('-') || peek() == 'T' || peek() == -1; 446 case DAY: 447 return peek() == 'T' || peek() == -1; 448 case HOUR: 449 case MINUTE: 450 return skip(':') || endOfTime(); 451 case SECOND: 452 return skip('.') || endOfTime(); 453 default: 454 return true; 455 } 456 } 457 458 private boolean endOfTime() { 459 final int c = peek(); 460 return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' '; 461 } 462 463 private static boolean isAsciiLetter(final char ch) { 464 return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z'); 465 } 466 467 private static boolean isAsciiDigit(final char ch) { 468 return '0' <= ch && ch <= '9'; 469 } 470 471 private int readNumber(final int maxDigits) { 472 final int start = pos; 473 int n = 0; 474 final int max = Math.min(length, pos + maxDigits); 475 while (pos < max && isAsciiDigit(string.charAt(pos))) { 476 n = n * 10 + string.charAt(pos++) - '0'; 477 } 478 tokenLength = pos - start; 479 return n; 480 } 481 482 private Name readName() { 483 final int start = pos; 484 final int limit = Math.min(pos + 3, length); 485 486 // first read up to the key length 487 while (pos < limit && isAsciiLetter(string.charAt(pos))) { 488 pos++; 489 } 490 final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH); 491 final Name name = names.get(key); 492 // then advance to end of name 493 while (pos < length && isAsciiLetter(string.charAt(pos))) { 494 pos++; 495 } 496 497 tokenLength = pos - start; 498 // make sure we have the full name or a prefix 499 if (name != null && name.matches(string, start, tokenLength)) { 500 return name; 501 } 502 return null; 503 } 504 505 private int readTimeZoneOffset() { 506 final int sign = string.charAt(pos - 1) == '+' ? 1 : -1; 507 int offset = readNumber(2); 508 skip(':'); 509 offset = offset * 60 + readNumber(2); 510 return sign * offset; 511 } 512 513 private boolean skipParentheses() { 514 int parenCount = 1; 515 while (pos < length && parenCount != 0) { 516 final char c = string.charAt(pos++); 517 if (c == '(') { 518 parenCount++; 519 } else if (c == ')') { 520 parenCount--; 521 } 522 } 523 return true; 524 } 525 526 private static int getDefaultValue(final int field) { 527 switch (field) { 528 case MONTH: 529 case DAY: 530 return 1; 531 default: 532 return 0; 533 } 534 } 535 536 private static boolean isDay(final int n) { 537 return 1 <= n && n <= 31; 538 } 539 540 private static boolean isMonth(final int n) { 541 return 1 <= n && n <= 12; 542 } 543 544 private static boolean isHour(final int n) { 545 return 0 <= n && n <= 24; 546 } 547 548 private static boolean isMinuteOrSecond(final int n) { 549 return 0 <= n && n < 60; 550 } 551 552 private static boolean isMillisecond(final int n) { 553 return 0<= n && n < 1000; 554 } 555 556 private boolean setMonth(final int m) { 557 if (!isSet(MONTH)) { 558 namedMonth = true; 559 set(MONTH, m); 560 return true; 561 } 562 return false; 563 } 564 565 private boolean setDateField(final int n) { 566 for (int field = YEAR; field != HOUR; field++) { 567 if (!isSet(field)) { 568 // no validation on legacy date fields 569 set(field, n); 570 return true; 571 } 572 } 573 return false; 574 } 575 576 private boolean setTimeField(final int n) { 577 for (int field = HOUR; field != TIMEZONE; field++) { 578 if (!isSet(field)) { 579 if (checkLegacyField(field, n)) { 580 set(field, n); 581 return true; 582 } 583 return false; 584 } 585 } 586 return false; 587 } 588 589 private boolean setTimezone(final int offset, final boolean asNumericOffset) { 590 if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) { 591 set(TIMEZONE, offset); 592 return true; 593 } 594 return false; 595 } 596 597 private boolean setAmPm(final int offset) { 598 if (!isSet(HOUR)) { 599 return false; 600 } 601 final int hour = get(HOUR); 602 if (hour >= 0 && hour <= 12) { 603 set(HOUR, hour + offset); 604 } 605 return true; 606 } 607 608 private boolean patchResult(final boolean strict) { 609 // sanity checks - make sure we have something 610 if (!isSet(YEAR) && !isSet(HOUR)) { 611 return false; 612 } 613 if (isSet(HOUR) && !isSet(MINUTE)) { 614 return false; 615 } 616 // fill in default values for unset fields except timezone 617 for (int field = YEAR; field <= TIMEZONE; field++) { 618 if (get(field) == null) { 619 if (field == TIMEZONE && !strict) { 620 // We only use UTC as default timezone for dates parsed complying with 621 // the format specified in ES5 15.9.1.15. Otherwise the slot is left empty 622 // and local timezone is used. 623 continue; 624 } 625 final int value = getDefaultValue(field); 626 set(field, value); 627 } 628 } 629 630 if (!strict) { 631 // swap year, month, and day if it looks like the right thing to do 632 if (isDay(get(YEAR))) { 633 final int d = get(YEAR); 634 set(YEAR, get(DAY)); 635 if (namedMonth) { 636 // d-m-y 637 set(DAY, d); 638 } else { 639 // m-d-y 640 final int d2 = get(MONTH); 641 set(MONTH, d); 642 set(DAY, d2); 643 } 644 } 645 // sanity checks now that we know what's what 646 if (!isMonth(get(MONTH)) || !isDay(get(DAY))) { 647 return false; 648 } 649 650 // add 1900 or 2000 to year if it's between 0 and 100 651 final int year = get(YEAR); 652 if (year >= 0 && year < 100) { 653 set(YEAR, year >= 50 ? 1900 + year : 2000 + year); 654 } 655 } else { 656 // 24 hour value is only allowed if all other time values are zero 657 if (get(HOUR) == 24 && 658 (get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) { 659 return false; 660 } 661 } 662 663 // set month to 0-based 664 set(MONTH, get(MONTH) - 1); 665 return true; 666 } 667 668 private static void addName(final String str, final int type, final int value) { 669 final Name name = new Name(str, type, value); 670 names.put(name.key, name); 671 } 672 673 private static class Name { 674 final String name; 675 final String key; 676 final int value; 677 final int type; 678 679 final static int DAY_OF_WEEK = -1; 680 final static int MONTH_NAME = 0; 681 final static int AM_PM = 1; 682 final static int TIMEZONE_ID = 2; 683 final static int TIME_SEPARATOR = 3; 684 685 Name(final String name, final int type, final int value) { 686 assert name != null; 687 assert name.equals(name.toLowerCase(Locale.ENGLISH)); 688 689 this.name = name; 690 // use first three characters as lookup key 691 this.key = name.substring(0, Math.min(3, name.length())); 692 this.type = type; 693 this.value = value; 694 } 695 696 public boolean matches(final String str, final int offset, final int len) { 697 return name.regionMatches(true, 0, str, offset, len); 698 } 699 700 @Override 701 public String toString() { 702 return name; 703 } 704 } 705 706 } --- EOF ---