1 /* 2 * Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package javafx.css; 27 28 import java.io.IOException; 29 import java.io.Reader; 30 import java.util.HashMap; 31 import java.util.Map; 32 33 import com.sun.javafx.css.parser.LexerState; 34 import com.sun.javafx.css.parser.Recognizer; 35 import com.sun.javafx.css.parser.Token; 36 37 38 final class CssLexer { 39 final static int STRING = 10; 40 final static int IDENT = 11; 41 final static int FUNCTION = 12; 42 final static int NUMBER = 13; 43 final static int CM = 14; 44 final static int EMS = 15; 45 final static int EXS = 16; 46 final static int IN = 17; 47 final static int MM = 18; 48 final static int PC = 19; 49 final static int PT = 20; 50 final static int PX = 21; 51 final static int PERCENTAGE = 22; 52 final static int DEG = 23; 53 final static int GRAD = 24; 54 final static int RAD = 25; 55 final static int TURN = 26; 56 final static int GREATER = 27; 57 final static int LBRACE = 28; 58 final static int RBRACE = 29; 59 final static int SEMI = 30; 60 final static int COLON = 31; 61 final static int SOLIDUS = 32; 62 final static int STAR = 33; 63 final static int LPAREN = 34; 64 final static int RPAREN = 35; 65 final static int COMMA = 36; 66 final static int HASH = 37; 67 final static int DOT = 38; 68 final static int IMPORTANT_SYM = 39; 69 final static int WS = 40; 70 final static int NL = 41; 71 final static int FONT_FACE = 42; 72 final static int URL = 43; 73 final static int IMPORT = 44; 74 final static int SECONDS = 45; 75 final static int MS = 46; 76 final static int AT_KEYWORD = 47; 77 78 private final Recognizer A = (c) -> c == 'a' || c == 'A'; 79 private final Recognizer B = (c) -> c == 'b' || c == 'B'; 80 private final Recognizer C = (c) -> c == 'c' || c == 'C'; 81 private final Recognizer D = (c) -> c == 'd' || c == 'D'; 82 private final Recognizer E = (c) -> c == 'e' || c == 'E'; 83 private final Recognizer F = (c) -> c == 'f' || c == 'F'; 84 private final Recognizer G = (c) -> c == 'g' || c == 'G'; 85 private final Recognizer H = (c) -> c == 'h' || c == 'H'; 86 private final Recognizer I = (c) -> c == 'i' || c == 'I'; 87 private final Recognizer J = (c) -> c == 'j' || c == 'J'; 88 private final Recognizer K = (c) -> c == 'k' || c == 'K'; 89 private final Recognizer L = (c) -> c == 'l' || c == 'L'; 90 private final Recognizer M = (c) -> c == 'm' || c == 'M'; 91 private final Recognizer N = (c) -> c == 'n' || c == 'N'; 92 private final Recognizer O = (c) -> c == 'o' || c == 'O'; 93 private final Recognizer P = (c) -> c == 'p' || c == 'P'; 94 private final Recognizer Q = (c) -> c == 'q' || c == 'Q'; 95 private final Recognizer R = (c) -> c == 'r' || c == 'R'; 96 private final Recognizer S = (c) -> c == 's' || c == 'S'; 97 private final Recognizer T = (c) -> c == 't' || c == 'T'; 98 private final Recognizer U = (c) -> c == 'u' || c == 'U'; 99 private final Recognizer V = (c) -> c == 'v' || c == 'V'; 100 private final Recognizer W = (c) -> c == 'w' || c == 'W'; 101 private final Recognizer X = (c) -> c == 'x' || c == 'X'; 102 private final Recognizer Y = (c) -> c == 'y' || c == 'Y'; 103 private final Recognizer Z = (c) -> c == 'z' || c == 'Z'; 104 private final Recognizer ALPHA = (c) -> ('a' <= c && c <= 'z') || 105 ('A' <= c && c <= 'Z'); 106 107 private final Recognizer NON_ASCII = (c) -> '\u0080' <= c && c <= '\uFFFF'; 108 109 private final Recognizer DOT_CHAR = (c) -> c == '.'; 110 private final Recognizer GREATER_CHAR = (c) -> c == '>'; 111 private final Recognizer LBRACE_CHAR = (c) -> c == '{'; 112 private final Recognizer RBRACE_CHAR = (c) -> c == '}'; 113 private final Recognizer SEMI_CHAR = (c) -> c == ';'; 114 private final Recognizer COLON_CHAR = (c) -> c == ':'; 115 private final Recognizer SOLIDUS_CHAR = (c) -> c == '/'; 116 private final Recognizer MINUS_CHAR = (c) -> c == '-'; 117 private final Recognizer PLUS_CHAR = (c) -> c == '+'; 118 private final Recognizer STAR_CHAR = (c) -> c == '*'; 119 private final Recognizer LPAREN_CHAR = (c) -> c == '('; 120 private final Recognizer RPAREN_CHAR = (c) -> c == ')'; 121 private final Recognizer COMMA_CHAR = (c) -> c == ','; 122 private final Recognizer UNDERSCORE_CHAR = (c) -> c == '_'; 123 private final Recognizer HASH_CHAR = (c) -> c == '#'; 124 125 private final Recognizer WS_CHARS = (c) -> c == ' ' || 126 c == '\t' || 127 c == '\r' || 128 c == '\n' || 129 c == '\f'; 130 private final Recognizer NL_CHARS = (c) -> (c == '\r' || c == '\n'); 131 132 private final Recognizer DIGIT = (c) -> '0' <= c && c <= '9'; 133 134 private final Recognizer HEX_DIGIT = (c) -> ('0' <= c && c <= '9') || 135 ('a' <= c && c <= 'f') || 136 ('A' <= c && c <= 'F'); 137 138 // The initial accepts any character 139 final LexerState initState = new LexerState("initState", null) { 140 @Override public boolean accepts(int c) { return true; } 141 }; 142 143 final LexerState hashState = new LexerState("hashState", 144 HASH_CHAR 145 ); 146 147 final LexerState minusState = new LexerState("minusState", 148 MINUS_CHAR 149 ); 150 151 final LexerState plusState = new LexerState("plusState", 152 PLUS_CHAR 153 ); 154 155 // The dot char is either just a dot or may be the start of a number 156 final LexerState dotState = new LexerState(DOT, "dotState", 157 DOT_CHAR 158 ); 159 160 // [_a-z]|{nonascii}|{escape} 161 final LexerState nmStartState = new LexerState(IDENT, "nmStartState", 162 UNDERSCORE_CHAR, ALPHA 163 ); 164 165 // nmchar [_a-z0-9-]|{nonascii}|{escape} 166 final LexerState nmCharState = new LexerState(IDENT, "nmCharState", 167 UNDERSCORE_CHAR, ALPHA, DIGIT, MINUS_CHAR 168 ); 169 170 // same as nmchar, but need to differentiate between nmchar in ident and 171 // nmchar in 172 final LexerState hashNameCharState = new LexerState(HASH, "hashNameCharState", 173 UNDERSCORE_CHAR, ALPHA, DIGIT, MINUS_CHAR 174 ); 175 176 // lparen after ident implies function 177 final LexerState lparenState = new LexerState(FUNCTION, "lparenState", 178 LPAREN_CHAR 179 ) { 180 @Override public int getType() { 181 182 if (text.indexOf("url(") == 0) { 183 try { 184 return consumeUrl(); 185 } catch (IOException ioe) { 186 return Token.INVALID; 187 } 188 } 189 return super.getType(); 190 } 191 }; 192 193 194 // initial digits in a number 195 final LexerState leadingDigitsState = new LexerState(NUMBER,"leadingDigitsState", 196 DIGIT 197 ); 198 199 // If the dot char follows leading digits, a plus or a minus, then it is 200 // a decimal mark 201 final LexerState decimalMarkState = new LexerState("decimalMarkState", 202 DOT_CHAR 203 ); 204 205 // digits following decimal mark 206 final LexerState trailingDigitsState = new LexerState(NUMBER,"trailingDigitsState", 207 DIGIT 208 ); 209 210 // http://www.w3.org/TR/css3-values/ 211 final LexerState unitsState = new UnitsState(); 212 213 private Map<LexerState, LexerState[]> createStateMap() { 214 215 Map<LexerState, LexerState[]> map = 216 new HashMap<LexerState, LexerState[]>(); 217 218 // initState -- [#] --> hashState 219 // initState -- [-] --> minusState 220 // initState -- [+] --> plusState 221 // initState -- [_a-z] --> nmStartState 222 // initState -- [0-9] --> leadingDigitsState 223 // initState -- [.] --> dotState 224 map.put( 225 initState, 226 new LexerState[] { 227 hashState, 228 minusState, 229 nmStartState, 230 plusState, 231 minusState, 232 leadingDigitsState, 233 dotState 234 } 235 ); 236 237 // minus could be the start of an ident or a number 238 // minusState -- [_a-z] --> nmStartState 239 // minusState -- [0-9] --> leadingDigitsState 240 // minusState -- [.] --> decimalMarkState 241 map.put( 242 minusState, 243 new LexerState[] { 244 nmStartState, 245 leadingDigitsState, 246 decimalMarkState, 247 } 248 ); 249 250 // 251 // # {name} 252 // hash {nmchar}+ 253 // hashState -- [_a-z0-9-] --> nmCharState 254 // nmCharState -- [_a-z0-9-] --> nmCharState 255 // 256 map.put( 257 hashState, 258 new LexerState[] { 259 hashNameCharState 260 } 261 ); 262 263 map.put( 264 hashNameCharState, 265 new LexerState[] { 266 hashNameCharState, 267 } 268 ); 269 270 271 // 272 // {ident} 273 // ident '-'? {nmchar}+ 274 // nmStartState -- [_a-z0-9-] --> nmCharState 275 // nmCharState -- [_a-z0-9-] --> nmCharState 276 // nmCharState -- [(] --> lparenState 277 // 278 map.put( 279 nmStartState, 280 new LexerState[] { 281 nmCharState 282 } 283 ); 284 285 map.put( 286 nmCharState, 287 new LexerState[] { 288 nmCharState, 289 lparenState 290 } 291 ); 292 293 // from +/- state, next state must be a digit or a dot 294 map.put( 295 plusState, 296 new LexerState[] { 297 leadingDigitsState, 298 decimalMarkState 299 } 300 ); 301 302 // from leadingDigitsState, next state must be 303 // another digit, a decimal mark, or units 304 map.put( 305 leadingDigitsState, 306 new LexerState[] { 307 leadingDigitsState, 308 decimalMarkState, 309 unitsState 310 } 311 ); 312 313 // from decimal mark, next state must be a digit. 314 // Need to map both dotState and decimalMarkState 315 // since dot might be the first character and would 316 // not be seen as a decimal point. 317 map.put( 318 dotState, 319 new LexerState[] { 320 trailingDigitsState 321 } 322 ); 323 324 map.put( 325 decimalMarkState, 326 new LexerState[] { 327 trailingDigitsState 328 } 329 ); 330 331 // from trailingDigitsState, next state must be another digit or units 332 map.put( 333 trailingDigitsState, 334 new LexerState[] { 335 trailingDigitsState, 336 unitsState, 337 } 338 ); 339 340 // UnitsState stays in UnitsState 341 map.put( 342 unitsState, 343 new LexerState[] { 344 unitsState 345 } 346 ); 347 348 return map; 349 } 350 351 CssLexer() { 352 this.stateMap = createStateMap(); 353 this.text = new StringBuilder(64); 354 this.currentState = initState; 355 } 356 357 void setReader(Reader reader) { 358 this.reader = reader; 359 lastc = -1; 360 pos = offset = 0; 361 line = 1; 362 this.currentState = initState; 363 this.token = null; 364 try { 365 this.ch = readChar(); 366 } catch (IOException ioe) { 367 token = Token.EOF_TOKEN; 368 } 369 } 370 371 private Token scanImportant() throws IOException{ 372 // CSS 2.1 grammar for important_sym 373 // "!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T} 374 final Recognizer[] important_sym = 375 new Recognizer[] { I, M, P, O, R, T, A, N, T }; 376 int current = 0; 377 378 text.append((char)ch); 379 380 // get past the '!' 381 ch = readChar(); 382 383 while(true) { 384 385 switch (ch) { 386 387 case Token.EOF: 388 token = Token.EOF_TOKEN; 389 return token; 390 391 case '/': 392 ch = readChar(); 393 if (ch == '*') skipComment(); 394 else if (ch == '/') skipEOL(); 395 else { 396 text.append('/').append((char)ch); 397 int temp = offset; 398 offset = pos; 399 return new Token(Token.INVALID, text.toString(), line, temp); 400 } 401 break; 402 403 case ' ': 404 case '\t': 405 case '\r': 406 case '\n': 407 case '\f': 408 ch = readChar(); 409 break; 410 411 default: 412 boolean accepted = true; 413 while(accepted && current < important_sym.length) { 414 accepted = important_sym[current++].recognize(ch); 415 text.append((char)ch); 416 ch = readChar(); 417 } 418 if (accepted) { 419 final int temp = offset; 420 offset = pos-1; // will have read one char too many 421 return new Token(IMPORTANT_SYM, "!important", line, temp); 422 } else { 423 while (ch != ';' && 424 ch != '}' && 425 ch != Token.EOF) { 426 ch = readChar(); 427 } 428 if (ch != Token.EOF) { 429 final int temp = offset; 430 offset = pos-1; // will have read one char too many 431 return new Token(Token.SKIP, text.toString(), line, temp); 432 } else { 433 return Token.EOF_TOKEN; 434 } 435 } 436 } 437 } 438 } 439 440 // http://www.ietf.org/rfc/rfc3986 441 // http://www.w3.org/TR/2011/REC-CSS2-20110607/syndata.html#uri 442 // http://www.w3.org/TR/css3-syntax/#consume-a-url-token 443 private int consumeUrl() throws IOException { 444 445 text.delete(0, text.length()); 446 447 // skip initial white space 448 while (WS_CHARS.recognize(ch)) { 449 ch = readChar(); 450 } 451 452 if (ch == Token.EOF) { 453 return Token.EOF; 454 } 455 456 if (ch == '\'' || ch == '"') { 457 458 int endQuote = ch; 459 460 ch = readChar(); 461 462 // consume the string 463 while (ch != endQuote) { 464 465 if (ch == Token.EOF) { 466 break; 467 } 468 469 // un-escaped newline is an error 470 if (NL_CHARS.recognize(ch)) { 471 break; 472 } 473 474 // handle escaped char 475 // Note: this block does not handle the algorithm for consuming hex-digits 476 if (ch == '\\') { 477 478 ch = readChar(); 479 480 if (NL_CHARS.recognize(ch)) { 481 482 // consume newline 483 while(NL_CHARS.recognize(ch)) { 484 ch = readChar(); 485 } 486 487 } else if (ch != Token.EOF) { 488 // if EOF, do nothing 489 text.append((char)ch); 490 ch = readChar(); 491 } 492 493 continue; 494 } 495 496 text.append((char)ch); 497 ch = readChar(); 498 499 } 500 501 if (ch == endQuote) { 502 503 ch = readChar(); 504 while(WS_CHARS.recognize(ch)) { 505 ch = readChar(); 506 } 507 508 // After consuming white-space, the char has to be rparen or EOF. Error otherwise. 509 if (ch == ')') { 510 // consume the rparen 511 ch = readChar(); 512 return URL; 513 } 514 515 if(ch == Token.EOF) { 516 return URL; 517 } 518 } 519 520 } else { 521 522 // TODO: a lot of repeat code from above 523 text.append((char)ch); 524 ch = readChar(); 525 526 while (true) { 527 528 while (WS_CHARS.recognize(ch)) { 529 ch = readChar(); 530 } 531 532 if (ch == ')') { 533 // consume the rparen 534 ch = readChar(); 535 return URL; 536 } 537 538 if (ch == Token.EOF) { 539 return URL; 540 } 541 542 // handle escaped char 543 // Note: this block does not handle the algorithm for consuming hex-digits 544 if (ch == '\\') { 545 546 ch = readChar(); 547 548 if (NL_CHARS.recognize(ch)) { 549 550 // consume newline 551 while(NL_CHARS.recognize(ch)) { 552 ch = readChar(); 553 } 554 555 } else if (ch != Token.EOF) { 556 // if EOF, do nothing 557 text.append((char)ch); 558 ch = readChar(); 559 } 560 561 continue; 562 } 563 564 if (ch == '\'' || ch == '"' || ch == '(') { 565 break; 566 } 567 568 text.append((char)ch); 569 ch = readChar(); 570 571 } 572 } 573 574 // if we get to here, then the token is bad 575 // consume up to rparen or eof 576 while(true) { 577 int lastCh = ch; 578 if (ch == Token.EOF) { 579 return Token.EOF; 580 } else if (ch == ')' && lastCh != '\\') { 581 ch = readChar(); 582 return Token.INVALID; 583 } 584 585 lastCh = ch; 586 ch = readChar(); 587 } 588 589 } 590 591 private class UnitsState extends LexerState { 592 593 private final Recognizer[][] units = { 594 595 // TODO: all units from http://www.w3.org/TR/css3-values/ 596 // If units are added, getType and unitsMask must be updated! 597 { C, M }, 598 { D, E, G }, 599 { E, M }, 600 { E, X }, 601 { G, R, A, D }, 602 { I, N }, 603 { M, M }, 604 { M, S }, 605 { P, C }, 606 { P, T }, 607 { P, X }, 608 { R, A, D }, 609 { S }, 610 { T, U, R, N }, 611 { (c) -> c == '%'} 612 }; 613 614 // One bit per unit 615 private int unitsMask = 0x7FFF; 616 617 // Offset into inner array of units 618 private int index = -1; 619 620 UnitsState() { 621 super(-1, "UnitsState", null); 622 } 623 624 @Override 625 public int getType() { 626 627 int type = Token.INVALID; 628 629 // Must keep this in sync with units array. 630 // Small switch will be faster than Math.log(oldMask)/Math.log(2) 631 switch (unitsMask) { 632 case 0x1: type = CM; break; 633 case 0x2: type = DEG; break; 634 case 0x4: type = EMS; break; 635 case 0x8: type = EXS; break; 636 case 0x10: type = GRAD; break; 637 case 0x20: type = IN; break; 638 case 0x40: type = MM; break; 639 case 0x80: type = MS; break; 640 case 0x100: type = PC; break; 641 case 0x200: type = PT; break; 642 case 0x400: type = PX; break; 643 case 0x800: type = RAD; break; 644 case 0x1000: type = SECONDS; break; 645 case 0x2000: type = TURN; break; 646 case 0x4000: type = PERCENTAGE; break; 647 default: type = Token.INVALID; 648 } 649 650 // reset 651 unitsMask = 0x7fff; 652 index = -1; 653 654 return type; 655 } 656 657 @Override 658 public boolean accepts(int c) { 659 660 // Ensure that something bogus like '10xyzzy' is 661 // consumed as a token by only returning false 662 // if the char is not alpha or % 663 if (!ALPHA.recognize(c) && c != '%') { 664 return false; 665 } 666 667 // If unitsMask is zero, then we've already figured out that 668 // this is an invalid token, but we want to accept c so that 669 // '10xyzzy' is consumed as a token, albeit an invalid one. 670 if (unitsMask == 0) return true; 671 672 index += 1; 673 674 for (int n=0 ; n < units.length; n++) { 675 676 final int u = 1 << n; 677 678 // the unit at this index already failed. Move on. 679 if ((unitsMask & u) == 0) continue; 680 681 if ((index >= units[n].length) || !(units[n][index].recognize(c))) { 682 // not a match, turn off this bit 683 unitsMask &= ~u; 684 } 685 686 } 687 688 689 return true; 690 } 691 692 } 693 694 private void skipComment() throws IOException { 695 while(ch != -1) { 696 if (ch == '*') { 697 ch = readChar(); 698 if (ch == '/') { 699 offset = pos; 700 ch=readChar(); 701 break; 702 } 703 } else { 704 ch = readChar(); 705 } 706 } 707 } 708 709 private void skipEOL() throws IOException { 710 711 int lastc = ch; 712 713 while (ch != -1) { 714 715 ch = readChar(); 716 717 // EOL is cr, lf, or crlf 718 if ((ch == '\n') || (lastc == '\r' && ch != '\n')) { 719 break; 720 } 721 } 722 723 } 724 725 private int pos = 0; 726 private int offset = 0; 727 private int line = 1; 728 private int lastc = -1; 729 730 private int readChar() throws IOException { 731 732 int c = reader.read(); 733 734 // only reset line and pos counters after having read a NL since 735 // a NL token is created after the readChar 736 if (lastc == '\n' || (lastc == '\r' && c != '\n')) { 737 // set pos to 1 since we've already read the first char of the new line 738 pos = 1; 739 offset = 0; 740 line++; 741 } else { 742 pos++; 743 } 744 745 lastc = c; 746 return c; 747 } 748 749 Token nextToken() { 750 751 Token tok = null; 752 if (token != null) { 753 tok = token; 754 if (token.getType() != Token.EOF) token = null; 755 } else { 756 do { 757 tok = getToken(); 758 } while (tok != null && 759 // tok.getType() != Token.EOF && 760 Token.SKIP_TOKEN.equals(tok)); 761 } 762 763 // reset text buffer and currentState 764 text.delete(0,text.length()); 765 currentState = initState; 766 767 return tok; 768 } 769 770 private Token getToken() { 771 772 try { 773 while (true) { 774 charNotConsumed = false; 775 776 final LexerState[] reachableStates = 777 currentState != null ? stateMap.get(currentState) : null; 778 779 final int max = reachableStates != null ? reachableStates.length : 0; 780 781 LexerState newState = null; 782 for (int n=0; n<max && newState == null; n++) { 783 final LexerState reachableState = reachableStates[n]; 784 if (reachableState.accepts(ch)) { 785 newState = reachableState; 786 } 787 } 788 789 if (newState != null) { 790 791 // Some reachable state was reached. Keep going until 792 // the char isn't accepted by any state 793 currentState = newState; 794 text.append((char)ch); 795 ch = readChar(); 796 continue; 797 798 } else { 799 800 // If none of the reachable states accepts the char, 801 // then see if there is a token. 802 803 final int type = currentState != null ? currentState.getType() : Token.INVALID; 804 805 // 806 // If the token is INVALID and 807 // the currentState is something other than initState, then 808 // there is an error, so return INVALID. 809 // 810 if (type != Token.INVALID || 811 !currentState.equals(initState)) { 812 813 final String str = text.toString(); 814 Token tok = new Token(type, str, line, offset); 815 // because the next char has already been read, 816 // the next token starts at pos-1 817 offset = pos-1; 818 819 // return here, but the next char has already been read. 820 return tok; 821 822 } 823 } 824 825 // The char wasn't accepted and there was no previous token. 826 switch (ch) { 827 828 case -1: 829 token = Token.EOF_TOKEN; 830 return token; 831 832 case '"': 833 case '\'': 834 835 text.append((char)ch); 836 final int endq = ch; 837 while((ch=readChar()) != -1) { 838 text.append((char)ch); 839 if (ch == endq) break; 840 } 841 842 if (ch != -1) { 843 token = new Token(STRING, text.toString(), line, offset); 844 offset = pos; 845 } else { 846 token = new Token(Token.INVALID, text.toString(), line, offset); 847 offset = pos; 848 } 849 break; 850 851 case '/': 852 ch = readChar(); 853 if (ch == '*') { 854 skipComment(); 855 if (ch != -1) { 856 continue; 857 } else { 858 token = Token.EOF_TOKEN; 859 return token; 860 } 861 } else if (ch == '/') { 862 skipEOL(); 863 if (ch != -1) { 864 continue; 865 } else { 866 token = Token.EOF_TOKEN; 867 return token; 868 } 869 } else { 870 // not a comment - a SOLIDUS 871 token = new Token(SOLIDUS,"/", line, offset); 872 offset = pos; 873 charNotConsumed = true; 874 } 875 break; 876 877 case '>': 878 879 token = new Token(GREATER,">", line, offset); 880 offset = pos; 881 break; 882 883 case '{': 884 token = new Token(LBRACE,"{", line, offset); 885 offset = pos; 886 break; 887 888 case '}': 889 token = new Token(RBRACE,"}", line, offset); 890 offset = pos; 891 break; 892 893 case ';': 894 token = new Token(SEMI,";", line, offset); 895 offset = pos; 896 break; 897 898 case ':': 899 token = new Token(COLON,":", line, offset); 900 offset = pos; 901 break; 902 903 case '*': 904 token = new Token(STAR,"*", line, offset); 905 offset = pos; 906 break; 907 908 case '(': 909 token = new Token(LPAREN,"(", line, offset); 910 offset = pos; 911 break; 912 913 case ')': 914 token = new Token(RPAREN,")", line, offset); 915 offset = pos; 916 break; 917 918 case ',': 919 token = new Token(COMMA,",", line, offset); 920 offset = pos; 921 break; 922 923 case '.': 924 token = new Token(DOT,".", line, offset); 925 offset = pos; 926 break; 927 928 case ' ': 929 case '\t': 930 case '\f': 931 token = new Token(WS, Character.toString((char)ch), line, offset); 932 offset = pos; 933 break; 934 935 936 case '\r': 937 token = new Token(NL, "\\r", line, offset); 938 // offset and pos are reset on next readChar 939 940 ch = readChar(); 941 if (ch == '\n') { 942 token = new Token(NL, "\\r\\n", line, offset); 943 // offset and pos are reset on next readChar 944 } else { 945 // already read the next character, so return 946 // return the NL token here (avoid the readChar 947 // at the end of the loop below) 948 final Token tok = token; 949 token = (ch == -1) ? Token.EOF_TOKEN : null; 950 return tok; 951 } 952 break; 953 954 case '\n': 955 token = new Token(NL, "\\n", line, offset); 956 // offset and pos are reset on next readChar 957 break; 958 959 case '!': 960 Token tok = scanImportant(); 961 return tok; 962 963 case '@': 964 token = new Token(AT_KEYWORD, "@", line, offset); 965 offset = pos; 966 break; 967 968 default: 969 // System.err.println("hit default case: ch = " + Character.toString((char)ch)); 970 token = new Token(Token.INVALID, Character.toString((char)ch), line, offset); 971 offset = pos; 972 break; 973 } 974 975 if (token == null) { 976 // System.err.println("token is null! ch = " + Character.toString((char)ch)); 977 token = new Token(Token.INVALID, null, line, offset); 978 offset = pos; 979 } else if (token.getType() == Token.EOF) { 980 return token; 981 } 982 983 if (ch != -1 && !charNotConsumed) ch = readChar(); 984 985 final Token tok = token; 986 token = null; 987 return tok; 988 } 989 } catch (IOException ioe) { 990 token = Token.EOF_TOKEN; 991 return token; 992 } 993 } 994 995 private int ch; 996 private boolean charNotConsumed = false; 997 private Reader reader; 998 private Token token; 999 private final Map<LexerState, LexerState[]> stateMap; 1000 private LexerState currentState; 1001 private final StringBuilder text; 1002 1003 }