1 /*
   2  * Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.javafx.css.parser;
  27 
  28 import java.io.IOException;
  29 import java.io.Reader;
  30 import java.util.HashMap;
  31 import java.util.Map;
  32 
  33 
  34 final class CSSLexer {
  35 
  36     /* Lazy instantiation */
  37     private static class InstanceHolder {
  38         final static CSSLexer INSTANCE = new CSSLexer();
  39     }
  40 
  41     public static CSSLexer getInstance() {
  42         return InstanceHolder.INSTANCE;
  43     }
  44 
  45     final static int STRING = 10;
  46     final static int IDENT = 11;
  47     final static int FUNCTION = 12;
  48     final static int NUMBER = 13;
  49     final static int CM = 14;
  50     final static int EMS = 15;
  51     final static int EXS = 16;
  52     final static int IN = 17;
  53     final static int MM = 18;
  54     final static int PC = 19;
  55     final static int PT = 20;
  56     final static int PX = 21;
  57     final static int PERCENTAGE = 22;
  58     final static int DEG = 23;
  59     final static int GRAD = 24;
  60     final static int RAD = 25;
  61     final static int TURN = 26;
  62     final static int GREATER = 27;
  63     final static int LBRACE = 28;
  64     final static int RBRACE = 29;
  65     final static int SEMI = 30;
  66     final static int COLON = 31;
  67     final static int SOLIDUS = 32;
  68     final static int STAR = 33;
  69     final static int LPAREN = 34;
  70     final static int RPAREN = 35;
  71     final static int COMMA = 36;
  72     final static int HASH = 37;
  73     final static int DOT = 38;
  74     final static int IMPORTANT_SYM = 39;
  75     final static int WS = 40;
  76     final static int NL = 41;
  77     final static int FONT_FACE = 42;
  78     final static int URL = 43;
  79     final static int IMPORT = 44;
  80     final static int SECONDS = 45;
  81     final static int MS = 46;
  82     final static int AT_KEYWORD = 47;
  83 
  84     private final Recognizer A = (c) -> c == 'a' || c == 'A';
  85     private final Recognizer B = (c) -> c == 'b' || c == 'B';
  86     private final Recognizer C = (c) -> c == 'c' || c == 'C';
  87     private final Recognizer D = (c) -> c == 'd' || c == 'D';
  88     private final Recognizer E = (c) -> c == 'e' || c == 'E';
  89     private final Recognizer F = (c) -> c == 'f' || c == 'F';
  90     private final Recognizer G = (c) -> c == 'g' || c == 'G';
  91     private final Recognizer H = (c) -> c == 'h' || c == 'H';
  92     private final Recognizer I = (c) -> c == 'i' || c == 'I';
  93     private final Recognizer J = (c) -> c == 'j' || c == 'J';
  94     private final Recognizer K = (c) -> c == 'k' || c == 'K';
  95     private final Recognizer L = (c) -> c == 'l' || c == 'L';
  96     private final Recognizer M = (c) -> c == 'm' || c == 'M';
  97     private final Recognizer N = (c) -> c == 'n' || c == 'N';
  98     private final Recognizer O = (c) -> c == 'o' || c == 'O';
  99     private final Recognizer P = (c) -> c == 'p' || c == 'P';
 100     private final Recognizer Q = (c) -> c == 'q' || c == 'Q';
 101     private final Recognizer R = (c) -> c == 'r' || c == 'R';
 102     private final Recognizer S = (c) -> c == 's' || c == 'S';
 103     private final Recognizer T = (c) -> c == 't' || c == 'T';
 104     private final Recognizer U = (c) -> c == 'u' || c == 'U';
 105     private final Recognizer V = (c) -> c == 'v' || c == 'V';
 106     private final Recognizer W = (c) -> c == 'w' || c == 'W';
 107     private final Recognizer X = (c) -> c == 'x' || c == 'X';
 108     private final Recognizer Y = (c) -> c == 'y' || c == 'Y';
 109     private final Recognizer Z = (c) -> c == 'z' || c == 'Z';
 110     private final Recognizer ALPHA =  (c) -> ('a' <= c && c <= 'z') ||
 111            ('A' <= c && c <= 'Z');
 112 
 113     private final Recognizer NON_ASCII = (c) -> '\u0080' <= c && c <= '\uFFFF';
 114 
 115     private final Recognizer DOT_CHAR =        (c) -> c == '.';
 116     private final Recognizer GREATER_CHAR =    (c) -> c == '>';
 117     private final Recognizer LBRACE_CHAR =     (c) -> c == '{';
 118     private final Recognizer RBRACE_CHAR =     (c) -> c == '}';
 119     private final Recognizer SEMI_CHAR  =      (c) -> c == ';';
 120     private final Recognizer COLON_CHAR =      (c) -> c == ':';
 121     private final Recognizer SOLIDUS_CHAR =    (c) -> c == '/';
 122     private final Recognizer MINUS_CHAR =      (c) -> c == '-';
 123     private final Recognizer PLUS_CHAR =       (c) -> c == '+';
 124     private final Recognizer STAR_CHAR =       (c) -> c == '*';
 125     private final Recognizer LPAREN_CHAR =     (c) -> c == '(';
 126     private final Recognizer RPAREN_CHAR =     (c) -> c == ')';
 127     private final Recognizer COMMA_CHAR =      (c) -> c == ',';
 128     private final Recognizer UNDERSCORE_CHAR = (c) -> c == '_';
 129     private final Recognizer HASH_CHAR =       (c) -> c == '#';
 130 
 131     private final Recognizer WS_CHARS = (c) -> c == ' '  ||
 132            c == '\t' ||
 133            c == '\r' ||
 134            c == '\n' ||
 135            c == '\f';
 136     private final Recognizer NL_CHARS = (c) -> (c == '\r' || c == '\n');
 137 
 138     private final Recognizer DIGIT = (c) -> '0' <= c && c <= '9';
 139 
 140     private final Recognizer HEX_DIGIT = (c) -> ('0' <= c && c <= '9') ||
 141            ('a' <= c && c <= 'f') ||
 142            ('A' <= c && c <= 'F');
 143 
 144     // The initial accepts any character
 145     final LexerState initState = new LexerState("initState", null) {
 146         @Override public boolean accepts(int c) { return true; }
 147     };
 148 
 149     final LexerState hashState = new LexerState("hashState",
 150         HASH_CHAR
 151     );
 152 
 153     final LexerState minusState = new LexerState("minusState",
 154         MINUS_CHAR
 155     );
 156 
 157     final LexerState plusState = new LexerState("plusState",
 158         PLUS_CHAR
 159     );
 160 
 161     // The dot char is either just a dot or may be the start of a number
 162     final LexerState dotState = new LexerState(DOT, "dotState",
 163         DOT_CHAR
 164     );
 165 
 166     // [_a-z]|{nonascii}|{escape}
 167     final LexerState nmStartState = new LexerState(IDENT, "nmStartState",
 168         UNDERSCORE_CHAR, ALPHA
 169     );
 170 
 171     // nmchar           [_a-z0-9-]|{nonascii}|{escape}
 172     final LexerState nmCharState = new LexerState(IDENT, "nmCharState",
 173         UNDERSCORE_CHAR, ALPHA, DIGIT, MINUS_CHAR
 174     );
 175 
 176     // same as nmchar, but need to differentiate between nmchar in ident and
 177     // nmchar in
 178     final LexerState hashNameCharState = new LexerState(HASH, "hashNameCharState",
 179         UNDERSCORE_CHAR, ALPHA, DIGIT, MINUS_CHAR
 180     );
 181 
 182     // lparen after ident implies function
 183     final LexerState lparenState = new LexerState(FUNCTION, "lparenState",
 184         LPAREN_CHAR
 185     ) {
 186         @Override public int getType() {
 187 
 188             if (text.indexOf("url(") == 0) {
 189                 try {
 190                     return consumeUrl();
 191                 } catch (IOException ioe) {
 192                     return Token.INVALID;
 193                 }
 194             }
 195             return super.getType();
 196         }
 197     };
 198 
 199 
 200     // initial digits in a number
 201     final LexerState leadingDigitsState = new LexerState(NUMBER,"leadingDigitsState",
 202         DIGIT
 203     );
 204 
 205     // If the dot char follows leading digits, a plus or a minus, then it is
 206     // a decimal mark
 207     final LexerState decimalMarkState = new LexerState("decimalMarkState",
 208         DOT_CHAR
 209     );
 210 
 211     // digits following decimal mark
 212     final LexerState trailingDigitsState = new LexerState(NUMBER,"trailingDigitsState",
 213         DIGIT
 214     );
 215     
 216     // http://www.w3.org/TR/css3-values/
 217     final LexerState unitsState = new UnitsState();
 218 
 219     private Map<LexerState, LexerState[]> createStateMap() {
 220 
 221         Map<LexerState, LexerState[]> map =
 222                 new HashMap<LexerState, LexerState[]>();
 223 
 224         // initState -- [#] --> hashState
 225         // initState -- [-] --> minusState
 226         // initState -- [+] --> plusState
 227         // initState -- [_a-z] --> nmStartState
 228         // initState -- [0-9] --> leadingDigitsState
 229         // initState -- [.] --> dotState
 230         map.put(
 231                 initState,
 232                 new LexerState[] {
 233                     hashState,
 234                     minusState,
 235                     nmStartState,
 236                     plusState,
 237                     minusState,
 238                     leadingDigitsState,
 239                     dotState
 240                 }
 241         );
 242 
 243         // minus could be the start of an ident or a number
 244         // minusState -- [_a-z] --> nmStartState
 245         // minusState -- [0-9] --> leadingDigitsState
 246         // minusState -- [.] --> decimalMarkState
 247         map.put(
 248                 minusState,
 249                 new LexerState[] {
 250                     nmStartState,
 251                     leadingDigitsState,
 252                     decimalMarkState,
 253                 }
 254         );
 255 
 256         //
 257         // # {name}
 258         // hash {nmchar}+
 259         // hashState -- [_a-z0-9-] --> nmCharState
 260         // nmCharState -- [_a-z0-9-] --> nmCharState
 261         //
 262         map.put(
 263                 hashState,
 264                 new LexerState[] {
 265                     hashNameCharState
 266                 }
 267         );
 268 
 269         map.put(
 270                 hashNameCharState,
 271                 new LexerState[] {
 272                     hashNameCharState,
 273                 }
 274         );
 275 
 276 
 277         //
 278         // {ident}
 279         // ident '-'? {nmchar}+
 280         // nmStartState -- [_a-z0-9-] --> nmCharState
 281         // nmCharState -- [_a-z0-9-] --> nmCharState
 282         // nmCharState -- [(] --> lparenState
 283         //
 284         map.put(
 285                 nmStartState,
 286                 new LexerState[] {
 287                     nmCharState
 288                 }
 289         );
 290 
 291         map.put(
 292                 nmCharState,
 293                 new LexerState[] {
 294                     nmCharState,
 295                     lparenState
 296                 }
 297         );
 298 
 299         // from +/- state, next state must be a digit or a dot
 300         map.put(
 301                 plusState,
 302                 new LexerState[] {
 303                     leadingDigitsState,
 304                     decimalMarkState
 305                 }
 306         );
 307 
 308         // from leadingDigitsState, next state must be
 309         // another digit, a decimal mark, or units
 310         map.put(
 311                 leadingDigitsState,
 312                 new LexerState[] {
 313                     leadingDigitsState,
 314                     decimalMarkState,
 315                     unitsState
 316                 }
 317         );
 318 
 319         // from decimal mark, next state must be a digit.
 320         // Need to map both dotState and decimalMarkState
 321         // since dot might be the first character and would
 322         // not be seen as a decimal point.
 323         map.put(
 324                 dotState,
 325                 new LexerState[] {
 326                     trailingDigitsState
 327                 }
 328         );
 329 
 330         map.put(
 331                 decimalMarkState,
 332                 new LexerState[] {
 333                     trailingDigitsState
 334                 }
 335         );
 336 
 337         // from trailingDigitsState, next state must be another digit or units
 338         map.put(
 339                 trailingDigitsState,
 340                 new LexerState[] {
 341                     trailingDigitsState,
 342                     unitsState,
 343                 }
 344         );
 345 
 346         // UnitsState stays in UnitsState
 347         map.put(
 348                 unitsState,
 349                 new LexerState[] {
 350                     unitsState
 351                 }
 352         );
 353 
 354         return map;
 355     }
 356 
 357     CSSLexer() {
 358         this.stateMap = createStateMap();
 359         this.text = new StringBuilder(64);
 360         this.currentState = initState;
 361     }
 362 
 363     public void setReader(Reader reader) {
 364         this.reader = reader;
 365         lastc = -1;
 366         pos = offset = 0;
 367         line = 1;
 368         this.currentState = initState;
 369         this.token = null;
 370         try {
 371             this.ch = readChar();
 372         } catch (IOException ioe) {
 373             token = Token.EOF_TOKEN;
 374         }
 375     }
 376 
 377     private Token scanImportant()  throws IOException{
 378         // CSS 2.1 grammar for important_sym
 379         // "!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T}
 380         final Recognizer[] important_sym =
 381                 new Recognizer[] { I, M, P, O, R, T, A, N, T };
 382         int current = 0;
 383         
 384         text.append((char)ch);
 385         
 386         // get past the '!'
 387         ch = readChar();
 388        
 389         while(true) {
 390             
 391             switch (ch) {
 392 
 393                 case Token.EOF:
 394                     token = Token.EOF_TOKEN;
 395                     return token;
 396 
 397                 case '/':                    
 398                     ch = readChar();
 399                     if (ch == '*') skipComment();
 400                     else if (ch == '/') skipEOL();
 401                     else {
 402                         text.append('/').append((char)ch);
 403                         int temp = offset;
 404                         offset = pos;
 405                         return new Token(Token.INVALID, text.toString(), line, temp);
 406                     }
 407                     break;
 408 
 409                 case ' ':
 410                 case '\t':
 411                 case '\r':
 412                 case '\n':
 413                 case '\f':
 414                     ch = readChar();
 415                     break;
 416 
 417                 default:
 418                     boolean accepted = true;
 419                     while(accepted && current < important_sym.length) {
 420                         accepted = important_sym[current++].recognize(ch);
 421                         text.append((char)ch);
 422                         ch = readChar();
 423                     }
 424                     if (accepted) {
 425                         final int temp = offset;
 426                         offset = pos-1; // will have read one char too many
 427                         return new Token(IMPORTANT_SYM, "!important", line, temp);
 428                     } else {
 429                         while (ch != ';' &&
 430                                ch != '}' &&
 431                                ch != Token.EOF) {
 432                             ch = readChar();
 433                         }
 434                         if (ch != Token.EOF) {
 435                             final int temp = offset;
 436                             offset = pos-1; // will have read one char too many
 437                             return new Token(Token.SKIP, text.toString(), line, temp);
 438                         } else {
 439                             return Token.EOF_TOKEN;
 440                         }
 441                     }
 442             }
 443         }
 444     }
 445 
 446     // http://www.ietf.org/rfc/rfc3986
 447     // http://www.w3.org/TR/2011/REC-CSS2-20110607/syndata.html#uri
 448     // http://www.w3.org/TR/css3-syntax/#consume-a-url-token
 449     private int consumeUrl() throws IOException {
 450 
 451         text.delete(0, text.length());
 452 
 453         // skip initial white space
 454         while (WS_CHARS.recognize(ch)) {
 455             ch = readChar();
 456         }
 457 
 458         if (ch == Token.EOF) {
 459             return Token.EOF;
 460         }
 461 
 462         if (ch == '\'' || ch == '"') {
 463 
 464             int endQuote = ch;
 465 
 466             ch = readChar();
 467 
 468             // consume the string
 469             while (ch != endQuote) {
 470 
 471                 if (ch == Token.EOF) {
 472                     break;
 473                 }
 474 
 475                 // un-escaped newline is an error
 476                 if (NL_CHARS.recognize(ch)) {
 477                     break;
 478                 }
 479 
 480                 // handle escaped char
 481                 // Note: this block does not handle the algorithm for consuming hex-digits
 482                 if (ch == '\\') {
 483 
 484                     ch = readChar();
 485 
 486                     if (NL_CHARS.recognize(ch)) {
 487 
 488                         // consume newline
 489                         while(NL_CHARS.recognize(ch)) {
 490                             ch = readChar();
 491                         }
 492 
 493                     } else if (ch != Token.EOF) {
 494                         // if EOF, do nothing
 495                         text.append((char)ch);
 496                         ch = readChar();
 497                     }
 498 
 499                     continue;
 500                 }
 501 
 502                 text.append((char)ch);
 503                 ch = readChar();
 504 
 505             }
 506 
 507             if (ch == endQuote) {
 508 
 509                 ch = readChar();
 510                 while(WS_CHARS.recognize(ch)) {
 511                     ch = readChar();
 512                 }
 513 
 514                 // After consuming white-space, the char has to be rparen or EOF. Error otherwise.
 515                 if (ch == ')') {
 516                     // consume the rparen
 517                     ch = readChar();
 518                     return URL;
 519                 }
 520 
 521                 if(ch == Token.EOF) {
 522                     return URL;
 523                 }
 524             }
 525 
 526         } else {
 527 
 528             // TODO: a lot of repeat code from above
 529             text.append((char)ch);
 530             ch = readChar();
 531 
 532             while (true) {
 533 
 534                 while (WS_CHARS.recognize(ch)) {
 535                     ch = readChar();
 536                 }
 537 
 538                 if (ch == ')') {
 539                     // consume the rparen
 540                     ch = readChar();
 541                     return URL;
 542                 }
 543 
 544                 if (ch == Token.EOF) {
 545                     return URL;
 546                 }
 547 
 548                 // handle escaped char
 549                 // Note: this block does not handle the algorithm for consuming hex-digits
 550                 if (ch == '\\') {
 551 
 552                     ch = readChar();
 553 
 554                     if (NL_CHARS.recognize(ch)) {
 555 
 556                         // consume newline
 557                         while(NL_CHARS.recognize(ch)) {
 558                             ch = readChar();
 559                         }
 560 
 561                     } else if (ch != Token.EOF) {
 562                         // if EOF, do nothing
 563                         text.append((char)ch);
 564                         ch = readChar();
 565                     }
 566 
 567                     continue;
 568                 }
 569 
 570                 if (ch == '\'' || ch == '"' || ch == '(') {
 571                     break;
 572                 }
 573 
 574                 text.append((char)ch);
 575                 ch = readChar();
 576 
 577             }
 578         }
 579 
 580         // if we get to here, then the token is bad
 581         // consume up to rparen or eof
 582         while(true) {
 583             int lastCh = ch;
 584             if (ch == Token.EOF) {
 585                 return Token.EOF;
 586             } else if (ch == ')' && lastCh != '\\') {
 587                 ch = readChar();
 588                 return Token.INVALID;
 589             }
 590 
 591             lastCh = ch;
 592             ch = readChar();
 593         }
 594 
 595     }
 596 
 597     private class UnitsState extends LexerState {
 598 
 599         private final Recognizer[][] units = {
 600         
 601             // TODO: all units from http://www.w3.org/TR/css3-values/
 602             // If units are added, getType and unitsMask must be updated!
 603             { C, M },
 604             { D, E, G },
 605             { E, M },
 606             { E, X },
 607             { G, R, A, D },
 608             { I, N },
 609             { M, M },
 610             { M, S },
 611             { P, C },
 612             { P, T },
 613             { P, X },
 614             { R, A, D },
 615             { S },
 616             { T, U, R, N },
 617             { (c) -> c == '%'}
 618         };
 619         
 620         // One bit per unit
 621         private int unitsMask = 0x7FFF;
 622 
 623         // Offset into inner array of units
 624         private int index = -1;
 625         
 626         UnitsState() {
 627             super(-1, "UnitsState", null);            
 628         }
 629         
 630         @Override
 631         public int getType() {
 632             
 633             int type = Token.INVALID;
 634                 
 635             // Must keep this in sync with units array.
 636             // Small switch will be faster than Math.log(oldMask)/Math.log(2) 
 637             switch (unitsMask) {
 638                 case 0x1: type = CM; break;
 639                 case 0x2: type = DEG; break;
 640                 case 0x4: type = EMS; break;
 641                 case 0x8: type = EXS; break;
 642                 case 0x10: type = GRAD; break;
 643                 case 0x20: type = IN; break;
 644                 case 0x40: type = MM; break;
 645                 case 0x80: type = MS; break;
 646                 case 0x100: type = PC; break;
 647                 case 0x200: type = PT; break;
 648                 case 0x400: type = PX; break;
 649                 case 0x800: type = RAD; break;
 650                 case 0x1000: type = SECONDS; break;
 651                 case 0x2000: type = TURN; break;
 652                 case 0x4000: type = PERCENTAGE; break;
 653                 default: type = Token.INVALID;
 654             }
 655              
 656             // reset
 657             unitsMask = 0x7fff;
 658             index = -1;
 659             
 660             return type;
 661         }
 662 
 663         @Override
 664         public boolean accepts(int c) {
 665             
 666             // Ensure that something bogus like '10xyzzy' is 
 667             // consumed as a token by only returning false
 668             // if the char is not alpha or %
 669             if (!ALPHA.recognize(c) && c != '%') {
 670                 return false;
 671             }
 672             
 673             // If unitsMask is zero, then we've already figured out that 
 674             // this is an invalid token, but we want to accept c so that 
 675             // '10xyzzy' is consumed as a token, albeit an invalid one.
 676             if (unitsMask == 0) return true;
 677             
 678             index += 1;
 679 
 680             for (int n=0 ; n < units.length; n++) {
 681                 
 682                 final int u = 1 << n;
 683                 
 684                 // the unit at this index already failed. Move on.
 685                 if ((unitsMask & u) == 0) continue;
 686 
 687                 if ((index >= units[n].length) || !(units[n][index].recognize(c))) {
 688                     // not a match, turn off this bit
 689                     unitsMask &= ~u;
 690                 }
 691                     
 692             }
 693 
 694 
 695             return true;
 696         }
 697 
 698     }
 699         
 700     private  void skipComment() throws IOException {
 701         while(ch != -1) {
 702             if (ch == '*') {
 703                 ch = readChar();
 704                 if (ch == '/') {
 705                     offset = pos;
 706                     ch=readChar();
 707                     break;
 708                 }
 709             } else {
 710                 ch = readChar();
 711             }
 712         }
 713     }
 714 
 715     private void skipEOL() throws IOException {
 716 
 717         int lastc = ch;
 718 
 719         while (ch != -1) {
 720 
 721             ch = readChar();
 722 
 723             // EOL is cr, lf, or crlf
 724             if ((ch == '\n') || (lastc == '\r' && ch != '\n')) {
 725                     break;
 726             }
 727         }
 728 
 729     }
 730 
 731     private int pos = 0;
 732     private int offset = 0;
 733     private int line = 1;
 734     private int lastc = -1;
 735 
 736     private int readChar() throws IOException {
 737 
 738         int c = reader.read();
 739 
 740         // only reset line and pos counters after having read a NL since
 741         // a NL token is created after the readChar
 742         if (lastc == '\n' || (lastc == '\r' && c != '\n')) {
 743             // set pos to 1 since we've already read the first char of the new line
 744             pos = 1; 
 745             offset = 0;
 746             line++;
 747         } else {
 748             pos++;
 749         }
 750         
 751         lastc = c;
 752         return c;
 753     }
 754 
 755     public Token nextToken() {
 756 
 757         Token tok = null;
 758         if (token != null) {
 759             tok = token;
 760             if (token.getType() != Token.EOF) token = null;
 761         } else {
 762             do {
 763                 tok = getToken();
 764             } while (tok != null &&
 765 //                     tok.getType() != Token.EOF &&
 766                      Token.SKIP_TOKEN.equals(tok));
 767         }
 768 
 769         // reset text buffer and currentState
 770         text.delete(0,text.length());
 771         currentState = initState;
 772 
 773         return tok;
 774     }
 775 
 776     private Token getToken() {
 777 
 778         try {
 779             while (true) {
 780                 charNotConsumed = false;
 781 
 782                 final LexerState[] reachableStates =
 783                         currentState != null ? stateMap.get(currentState) : null;
 784 
 785                 final int max = reachableStates != null ? reachableStates.length : 0;
 786 
 787                 LexerState newState = null;
 788                 for (int n=0; n<max && newState == null; n++) {
 789                     final LexerState reachableState = reachableStates[n];
 790                     if (reachableState.accepts(ch)) {
 791                         newState = reachableState;
 792                     }
 793                 }
 794 
 795                 if (newState != null) {
 796 
 797                     // Some reachable state was reached. Keep going until
 798                     // the char isn't accepted by any state
 799                     currentState = newState;
 800                     text.append((char)ch);
 801                     ch = readChar();
 802                     continue;
 803 
 804                 } else {
 805 
 806                     // If none of the reachable states accepts the char,
 807                     // then see if there is a token.
 808 
 809                     final int type = currentState != null ? currentState.getType() : Token.INVALID;
 810 
 811                     //
 812                     // If the token is INVALID and
 813                     // the currentState is something other than initState, then
 814                     // there is an error, so return INVALID.
 815                      //
 816                     if (type != Token.INVALID ||
 817                         !currentState.equals(initState)) {
 818 
 819                         final String str = text.toString();
 820                         Token tok = new Token(type, str, line, offset);
 821                         // because the next char has already been read, 
 822                         // the next token starts at pos-1
 823                         offset = pos-1;
 824 
 825                         // return here, but the next char has already been read.
 826                         return tok;
 827 
 828                     }
 829                 }
 830 
 831                 // The char wasn't accepted and there was no previous token.
 832                 switch (ch) {
 833 
 834                     case -1:
 835                         token = Token.EOF_TOKEN;
 836                         return token;
 837 
 838                     case '"':
 839                     case '\'':
 840 
 841                         text.append((char)ch);
 842                         final int endq = ch;
 843                         while((ch=readChar()) != -1) {
 844                             text.append((char)ch);
 845                             if (ch == endq) break;
 846                         }
 847 
 848                         if (ch != -1) {
 849                             token = new Token(STRING, text.toString(), line, offset);
 850                             offset = pos;
 851                         } else {
 852                             token = new Token(Token.INVALID, text.toString(), line, offset);
 853                             offset = pos;
 854                         }
 855                         break;
 856 
 857                     case '/':
 858                         ch = readChar();
 859                         if (ch == '*') {
 860                             skipComment();
 861                              if (ch != -1) {
 862                                 continue;
 863                             } else {
 864                                 token = Token.EOF_TOKEN;
 865                                 return token;
 866                             }
 867                         } else if (ch == '/') {
 868                             skipEOL();
 869                             if (ch != -1) {
 870                                 continue;
 871                             } else {
 872                                 token = Token.EOF_TOKEN;
 873                                 return token;
 874                             }
 875                         } else {
 876                             // not a comment - a SOLIDUS
 877                             token = new Token(SOLIDUS,"/", line, offset);
 878                             offset = pos;
 879                             charNotConsumed = true;
 880                         }
 881                         break;
 882 
 883                     case '>':
 884 
 885                         token = new Token(GREATER,">", line, offset);
 886                         offset = pos;
 887                         break;
 888 
 889                     case '{':
 890                         token = new Token(LBRACE,"{", line, offset);
 891                         offset = pos;
 892                         break;
 893 
 894                     case '}':
 895                         token = new Token(RBRACE,"}", line, offset);
 896                         offset = pos;
 897                         break;
 898 
 899                     case ';':
 900                         token = new Token(SEMI,";", line, offset);
 901                         offset = pos;
 902                         break;
 903 
 904                     case ':':
 905                         token = new Token(COLON,":", line, offset);
 906                         offset = pos;
 907                         break;
 908 
 909                     case '*':
 910                         token = new Token(STAR,"*", line, offset);
 911                         offset = pos;
 912                         break;
 913 
 914                     case '(':
 915                         token = new Token(LPAREN,"(", line, offset);
 916                         offset = pos;
 917                         break;
 918 
 919                     case ')':
 920                         token = new Token(RPAREN,")", line, offset);
 921                         offset = pos;
 922                         break;
 923 
 924                     case ',':
 925                         token = new Token(COMMA,",", line, offset);
 926                         offset = pos;
 927                         break;
 928 
 929                     case '.':
 930                         token = new Token(DOT,".", line, offset);
 931                         offset = pos;
 932                         break;
 933 
 934                     case ' ':
 935                     case '\t':
 936                     case '\f':
 937                         token = new Token(WS, Character.toString((char)ch), line, offset);
 938                         offset = pos;
 939                         break;
 940 
 941 
 942                     case '\r':
 943                         token = new Token(NL, "\\r", line, offset);
 944                         // offset and pos are reset on next readChar
 945                         
 946                         ch = readChar();
 947                         if (ch == '\n') {
 948                             token = new Token(NL, "\\r\\n", line, offset);
 949                             // offset and pos are reset on next readChar
 950                         } else {
 951                             // already read the next character, so return
 952                             // return the NL token here (avoid the readChar
 953                             // at the end of the loop below)
 954                             final Token tok = token;
 955                             token = (ch == -1) ? Token.EOF_TOKEN : null;
 956                             return tok;
 957                         }                        
 958                         break;
 959 
 960                     case '\n':
 961                         token = new Token(NL, "\\n", line, offset);
 962                         // offset and pos are reset on next readChar
 963                         break;
 964 
 965                     case '!':
 966                         Token tok = scanImportant();
 967                         return tok;
 968 
 969                     case '@':
 970                         token = new Token(AT_KEYWORD, "@", line, offset);
 971                         offset = pos;
 972                         break;
 973 
 974                     default:
 975 //                      System.err.println("hit default case: ch = " + Character.toString((char)ch));
 976                         token = new Token(Token.INVALID, Character.toString((char)ch), line, offset);
 977                         offset = pos;
 978                         break;
 979                 }
 980 
 981                 if (token == null) {
 982 //                    System.err.println("token is null! ch = " + Character.toString((char)ch));
 983                     token = new Token(Token.INVALID, null, line, offset);
 984                     offset = pos;
 985                 } else if (token.getType() == Token.EOF) {
 986                     return token;
 987                 } 
 988 
 989                 if (ch != -1 && !charNotConsumed) ch = readChar();
 990 
 991                 final Token tok = token;
 992                 token = null;
 993                 return tok;
 994             }
 995         } catch (IOException ioe) {
 996             token = Token.EOF_TOKEN;
 997             return token;
 998         }
 999     }
1000     
1001     private int ch;
1002     private boolean charNotConsumed = false;
1003     private Reader reader;
1004     private Token token;
1005     private final Map<LexerState, LexerState[]> stateMap;
1006     private LexerState currentState;
1007     private final StringBuilder text;
1008 
1009 }