1 /*
   2  * Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javafx.css;
  27 
  28 import java.io.IOException;
  29 import java.io.Reader;
  30 import java.util.HashMap;
  31 import java.util.Map;
  32 
  33 import com.sun.javafx.css.parser.LexerState;
  34 import com.sun.javafx.css.parser.Recognizer;
  35 import com.sun.javafx.css.parser.Token;
  36 
  37     
  38 final class CssLexer {
  39     final static int STRING = 10;
  40     final static int IDENT = 11;
  41     final static int FUNCTION = 12;
  42     final static int NUMBER = 13;
  43     final static int CM = 14;
  44     final static int EMS = 15;
  45     final static int EXS = 16;
  46     final static int IN = 17;
  47     final static int MM = 18;
  48     final static int PC = 19;
  49     final static int PT = 20;
  50     final static int PX = 21;
  51     final static int PERCENTAGE = 22;
  52     final static int DEG = 23;
  53     final static int GRAD = 24;
  54     final static int RAD = 25;
  55     final static int TURN = 26;
  56     final static int GREATER = 27;
  57     final static int LBRACE = 28;
  58     final static int RBRACE = 29;
  59     final static int SEMI = 30;
  60     final static int COLON = 31;
  61     final static int SOLIDUS = 32;
  62     final static int STAR = 33;
  63     final static int LPAREN = 34;
  64     final static int RPAREN = 35;
  65     final static int COMMA = 36;
  66     final static int HASH = 37;
  67     final static int DOT = 38;
  68     final static int IMPORTANT_SYM = 39;
  69     final static int WS = 40;
  70     final static int NL = 41;
  71     final static int FONT_FACE = 42;
  72     final static int URL = 43;
  73     final static int IMPORT = 44;
  74     final static int SECONDS = 45;
  75     final static int MS = 46;
  76     final static int AT_KEYWORD = 47;
  77 
  78     private final Recognizer A = (c) -> c == 'a' || c == 'A';
  79     private final Recognizer B = (c) -> c == 'b' || c == 'B';
  80     private final Recognizer C = (c) -> c == 'c' || c == 'C';
  81     private final Recognizer D = (c) -> c == 'd' || c == 'D';
  82     private final Recognizer E = (c) -> c == 'e' || c == 'E';
  83     private final Recognizer F = (c) -> c == 'f' || c == 'F';
  84     private final Recognizer G = (c) -> c == 'g' || c == 'G';
  85     private final Recognizer H = (c) -> c == 'h' || c == 'H';
  86     private final Recognizer I = (c) -> c == 'i' || c == 'I';
  87     private final Recognizer J = (c) -> c == 'j' || c == 'J';
  88     private final Recognizer K = (c) -> c == 'k' || c == 'K';
  89     private final Recognizer L = (c) -> c == 'l' || c == 'L';
  90     private final Recognizer M = (c) -> c == 'm' || c == 'M';
  91     private final Recognizer N = (c) -> c == 'n' || c == 'N';
  92     private final Recognizer O = (c) -> c == 'o' || c == 'O';
  93     private final Recognizer P = (c) -> c == 'p' || c == 'P';
  94     private final Recognizer Q = (c) -> c == 'q' || c == 'Q';
  95     private final Recognizer R = (c) -> c == 'r' || c == 'R';
  96     private final Recognizer S = (c) -> c == 's' || c == 'S';
  97     private final Recognizer T = (c) -> c == 't' || c == 'T';
  98     private final Recognizer U = (c) -> c == 'u' || c == 'U';
  99     private final Recognizer V = (c) -> c == 'v' || c == 'V';
 100     private final Recognizer W = (c) -> c == 'w' || c == 'W';
 101     private final Recognizer X = (c) -> c == 'x' || c == 'X';
 102     private final Recognizer Y = (c) -> c == 'y' || c == 'Y';
 103     private final Recognizer Z = (c) -> c == 'z' || c == 'Z';
 104     private final Recognizer ALPHA =  (c) -> ('a' <= c && c <= 'z') ||
 105            ('A' <= c && c <= 'Z');
 106 
 107     private final Recognizer NON_ASCII = (c) -> '\u0080' <= c && c <= '\uFFFF';
 108 
 109     private final Recognizer DOT_CHAR =        (c) -> c == '.';
 110     private final Recognizer GREATER_CHAR =    (c) -> c == '>';
 111     private final Recognizer LBRACE_CHAR =     (c) -> c == '{';
 112     private final Recognizer RBRACE_CHAR =     (c) -> c == '}';
 113     private final Recognizer SEMI_CHAR  =      (c) -> c == ';';
 114     private final Recognizer COLON_CHAR =      (c) -> c == ':';
 115     private final Recognizer SOLIDUS_CHAR =    (c) -> c == '/';
 116     private final Recognizer MINUS_CHAR =      (c) -> c == '-';
 117     private final Recognizer PLUS_CHAR =       (c) -> c == '+';
 118     private final Recognizer STAR_CHAR =       (c) -> c == '*';
 119     private final Recognizer LPAREN_CHAR =     (c) -> c == '(';
 120     private final Recognizer RPAREN_CHAR =     (c) -> c == ')';
 121     private final Recognizer COMMA_CHAR =      (c) -> c == ',';
 122     private final Recognizer UNDERSCORE_CHAR = (c) -> c == '_';
 123     private final Recognizer HASH_CHAR =       (c) -> c == '#';
 124 
 125     private final Recognizer WS_CHARS = (c) -> c == ' '  ||
 126            c == '\t' ||
 127            c == '\r' ||
 128            c == '\n' ||
 129            c == '\f';
 130     private final Recognizer NL_CHARS = (c) -> (c == '\r' || c == '\n');
 131 
 132     private final Recognizer DIGIT = (c) -> '0' <= c && c <= '9';
 133 
 134     private final Recognizer HEX_DIGIT = (c) -> ('0' <= c && c <= '9') ||
 135            ('a' <= c && c <= 'f') ||
 136            ('A' <= c && c <= 'F');
 137 
 138     // The initial accepts any character
 139     final LexerState initState = new LexerState("initState", null) {
 140         @Override public boolean accepts(int c) { return true; }
 141     };
 142 
 143     final LexerState hashState = new LexerState("hashState",
 144         HASH_CHAR
 145     );
 146 
 147     final LexerState minusState = new LexerState("minusState",
 148         MINUS_CHAR
 149     );
 150 
 151     final LexerState plusState = new LexerState("plusState",
 152         PLUS_CHAR
 153     );
 154 
 155     // The dot char is either just a dot or may be the start of a number
 156     final LexerState dotState = new LexerState(DOT, "dotState",
 157         DOT_CHAR
 158     );
 159 
 160     // [_a-z]|{nonascii}|{escape}
 161     final LexerState nmStartState = new LexerState(IDENT, "nmStartState",
 162         UNDERSCORE_CHAR, ALPHA
 163     );
 164 
 165     // nmchar           [_a-z0-9-]|{nonascii}|{escape}
 166     final LexerState nmCharState = new LexerState(IDENT, "nmCharState",
 167         UNDERSCORE_CHAR, ALPHA, DIGIT, MINUS_CHAR
 168     );
 169 
 170     // same as nmchar, but need to differentiate between nmchar in ident and
 171     // nmchar in
 172     final LexerState hashNameCharState = new LexerState(HASH, "hashNameCharState",
 173         UNDERSCORE_CHAR, ALPHA, DIGIT, MINUS_CHAR
 174     );
 175 
 176     // lparen after ident implies function
 177     final LexerState lparenState = new LexerState(FUNCTION, "lparenState",
 178         LPAREN_CHAR
 179     ) {
 180         @Override public int getType() {
 181 
 182             if (text.indexOf("url(") == 0) {
 183                 try {
 184                     return consumeUrl();
 185                 } catch (IOException ioe) {
 186                     return Token.INVALID;
 187                 }
 188             }
 189             return super.getType();
 190         }
 191     };
 192 
 193 
 194     // initial digits in a number
 195     final LexerState leadingDigitsState = new LexerState(NUMBER,"leadingDigitsState",
 196         DIGIT
 197     );
 198 
 199     // If the dot char follows leading digits, a plus or a minus, then it is
 200     // a decimal mark
 201     final LexerState decimalMarkState = new LexerState("decimalMarkState",
 202         DOT_CHAR
 203     );
 204 
 205     // digits following decimal mark
 206     final LexerState trailingDigitsState = new LexerState(NUMBER,"trailingDigitsState",
 207         DIGIT
 208     );
 209     
 210     // http://www.w3.org/TR/css3-values/
 211     final LexerState unitsState = new UnitsState();
 212 
 213     private Map<LexerState, LexerState[]> createStateMap() {
 214 
 215         Map<LexerState, LexerState[]> map =
 216                 new HashMap<LexerState, LexerState[]>();
 217 
 218         // initState -- [#] --> hashState
 219         // initState -- [-] --> minusState
 220         // initState -- [+] --> plusState
 221         // initState -- [_a-z] --> nmStartState
 222         // initState -- [0-9] --> leadingDigitsState
 223         // initState -- [.] --> dotState
 224         map.put(
 225                 initState,
 226                 new LexerState[] {
 227                     hashState,
 228                     minusState,
 229                     nmStartState,
 230                     plusState,
 231                     minusState,
 232                     leadingDigitsState,
 233                     dotState
 234                 }
 235         );
 236 
 237         // minus could be the start of an ident or a number
 238         // minusState -- [_a-z] --> nmStartState
 239         // minusState -- [0-9] --> leadingDigitsState
 240         // minusState -- [.] --> decimalMarkState
 241         map.put(
 242                 minusState,
 243                 new LexerState[] {
 244                     nmStartState,
 245                     leadingDigitsState,
 246                     decimalMarkState,
 247                 }
 248         );
 249 
 250         //
 251         // # {name}
 252         // hash {nmchar}+
 253         // hashState -- [_a-z0-9-] --> nmCharState
 254         // nmCharState -- [_a-z0-9-] --> nmCharState
 255         //
 256         map.put(
 257                 hashState,
 258                 new LexerState[] {
 259                     hashNameCharState
 260                 }
 261         );
 262 
 263         map.put(
 264                 hashNameCharState,
 265                 new LexerState[] {
 266                     hashNameCharState,
 267                 }
 268         );
 269 
 270 
 271         //
 272         // {ident}
 273         // ident '-'? {nmchar}+
 274         // nmStartState -- [_a-z0-9-] --> nmCharState
 275         // nmCharState -- [_a-z0-9-] --> nmCharState
 276         // nmCharState -- [(] --> lparenState
 277         //
 278         map.put(
 279                 nmStartState,
 280                 new LexerState[] {
 281                     nmCharState
 282                 }
 283         );
 284 
 285         map.put(
 286                 nmCharState,
 287                 new LexerState[] {
 288                     nmCharState,
 289                     lparenState
 290                 }
 291         );
 292 
 293         // from +/- state, next state must be a digit or a dot
 294         map.put(
 295                 plusState,
 296                 new LexerState[] {
 297                     leadingDigitsState,
 298                     decimalMarkState
 299                 }
 300         );
 301 
 302         // from leadingDigitsState, next state must be
 303         // another digit, a decimal mark, or units
 304         map.put(
 305                 leadingDigitsState,
 306                 new LexerState[] {
 307                     leadingDigitsState,
 308                     decimalMarkState,
 309                     unitsState
 310                 }
 311         );
 312 
 313         // from decimal mark, next state must be a digit.
 314         // Need to map both dotState and decimalMarkState
 315         // since dot might be the first character and would
 316         // not be seen as a decimal point.
 317         map.put(
 318                 dotState,
 319                 new LexerState[] {
 320                     trailingDigitsState
 321                 }
 322         );
 323 
 324         map.put(
 325                 decimalMarkState,
 326                 new LexerState[] {
 327                     trailingDigitsState
 328                 }
 329         );
 330 
 331         // from trailingDigitsState, next state must be another digit or units
 332         map.put(
 333                 trailingDigitsState,
 334                 new LexerState[] {
 335                     trailingDigitsState,
 336                     unitsState,
 337                 }
 338         );
 339 
 340         // UnitsState stays in UnitsState
 341         map.put(
 342                 unitsState,
 343                 new LexerState[] {
 344                     unitsState
 345                 }
 346         );
 347 
 348         return map;
 349     }
 350 
 351     CssLexer() {
 352         this.stateMap = createStateMap();
 353         this.text = new StringBuilder(64);
 354         this.currentState = initState;
 355     }
 356 
 357     void setReader(Reader reader) {
 358         this.reader = reader;
 359         lastc = -1;
 360         pos = offset = 0;
 361         line = 1;
 362         this.currentState = initState;
 363         this.token = null;
 364         try {
 365             this.ch = readChar();
 366         } catch (IOException ioe) {
 367             token = Token.EOF_TOKEN;
 368         }
 369     }
 370 
 371     private Token scanImportant()  throws IOException{
 372         // CSS 2.1 grammar for important_sym
 373         // "!"({w}|{comment})*{I}{M}{P}{O}{R}{T}{A}{N}{T}
 374         final Recognizer[] important_sym =
 375                 new Recognizer[] { I, M, P, O, R, T, A, N, T };
 376         int current = 0;
 377         
 378         text.append((char)ch);
 379         
 380         // get past the '!'
 381         ch = readChar();
 382        
 383         while(true) {
 384             
 385             switch (ch) {
 386 
 387                 case Token.EOF:
 388                     token = Token.EOF_TOKEN;
 389                     return token;
 390 
 391                 case '/':                    
 392                     ch = readChar();
 393                     if (ch == '*') skipComment();
 394                     else if (ch == '/') skipEOL();
 395                     else {
 396                         text.append('/').append((char)ch);
 397                         int temp = offset;
 398                         offset = pos;
 399                         return new Token(Token.INVALID, text.toString(), line, temp);
 400                     }
 401                     break;
 402 
 403                 case ' ':
 404                 case '\t':
 405                 case '\r':
 406                 case '\n':
 407                 case '\f':
 408                     ch = readChar();
 409                     break;
 410 
 411                 default:
 412                     boolean accepted = true;
 413                     while(accepted && current < important_sym.length) {
 414                         accepted = important_sym[current++].recognize(ch);
 415                         text.append((char)ch);
 416                         ch = readChar();
 417                     }
 418                     if (accepted) {
 419                         final int temp = offset;
 420                         offset = pos-1; // will have read one char too many
 421                         return new Token(IMPORTANT_SYM, "!important", line, temp);
 422                     } else {
 423                         while (ch != ';' &&
 424                                ch != '}' &&
 425                                ch != Token.EOF) {
 426                             ch = readChar();
 427                         }
 428                         if (ch != Token.EOF) {
 429                             final int temp = offset;
 430                             offset = pos-1; // will have read one char too many
 431                             return new Token(Token.SKIP, text.toString(), line, temp);
 432                         } else {
 433                             return Token.EOF_TOKEN;
 434                         }
 435                     }
 436             }
 437         }
 438     }
 439 
 440     // http://www.ietf.org/rfc/rfc3986
 441     // http://www.w3.org/TR/2011/REC-CSS2-20110607/syndata.html#uri
 442     // http://www.w3.org/TR/css3-syntax/#consume-a-url-token
 443     private int consumeUrl() throws IOException {
 444 
 445         text.delete(0, text.length());
 446 
 447         // skip initial white space
 448         while (WS_CHARS.recognize(ch)) {
 449             ch = readChar();
 450         }
 451 
 452         if (ch == Token.EOF) {
 453             return Token.EOF;
 454         }
 455 
 456         if (ch == '\'' || ch == '"') {
 457 
 458             int endQuote = ch;
 459 
 460             ch = readChar();
 461 
 462             // consume the string
 463             while (ch != endQuote) {
 464 
 465                 if (ch == Token.EOF) {
 466                     break;
 467                 }
 468 
 469                 // un-escaped newline is an error
 470                 if (NL_CHARS.recognize(ch)) {
 471                     break;
 472                 }
 473 
 474                 // handle escaped char
 475                 // Note: this block does not handle the algorithm for consuming hex-digits
 476                 if (ch == '\\') {
 477 
 478                     ch = readChar();
 479 
 480                     if (NL_CHARS.recognize(ch)) {
 481 
 482                         // consume newline
 483                         while(NL_CHARS.recognize(ch)) {
 484                             ch = readChar();
 485                         }
 486 
 487                     } else if (ch != Token.EOF) {
 488                         // if EOF, do nothing
 489                         text.append((char)ch);
 490                         ch = readChar();
 491                     }
 492 
 493                     continue;
 494                 }
 495 
 496                 text.append((char)ch);
 497                 ch = readChar();
 498 
 499             }
 500 
 501             if (ch == endQuote) {
 502 
 503                 ch = readChar();
 504                 while(WS_CHARS.recognize(ch)) {
 505                     ch = readChar();
 506                 }
 507 
 508                 // After consuming white-space, the char has to be rparen or EOF. Error otherwise.
 509                 if (ch == ')') {
 510                     // consume the rparen
 511                     ch = readChar();
 512                     return URL;
 513                 }
 514 
 515                 if(ch == Token.EOF) {
 516                     return URL;
 517                 }
 518             }
 519 
 520         } else {
 521 
 522             // TODO: a lot of repeat code from above
 523             text.append((char)ch);
 524             ch = readChar();
 525 
 526             while (true) {
 527 
 528                 while (WS_CHARS.recognize(ch)) {
 529                     ch = readChar();
 530                 }
 531 
 532                 if (ch == ')') {
 533                     // consume the rparen
 534                     ch = readChar();
 535                     return URL;
 536                 }
 537 
 538                 if (ch == Token.EOF) {
 539                     return URL;
 540                 }
 541 
 542                 // handle escaped char
 543                 // Note: this block does not handle the algorithm for consuming hex-digits
 544                 if (ch == '\\') {
 545 
 546                     ch = readChar();
 547 
 548                     if (NL_CHARS.recognize(ch)) {
 549 
 550                         // consume newline
 551                         while(NL_CHARS.recognize(ch)) {
 552                             ch = readChar();
 553                         }
 554 
 555                     } else if (ch != Token.EOF) {
 556                         // if EOF, do nothing
 557                         text.append((char)ch);
 558                         ch = readChar();
 559                     }
 560 
 561                     continue;
 562                 }
 563 
 564                 if (ch == '\'' || ch == '"' || ch == '(') {
 565                     break;
 566                 }
 567 
 568                 text.append((char)ch);
 569                 ch = readChar();
 570 
 571             }
 572         }
 573 
 574         // if we get to here, then the token is bad
 575         // consume up to rparen or eof
 576         while(true) {
 577             int lastCh = ch;
 578             if (ch == Token.EOF) {
 579                 return Token.EOF;
 580             } else if (ch == ')' && lastCh != '\\') {
 581                 ch = readChar();
 582                 return Token.INVALID;
 583             }
 584 
 585             lastCh = ch;
 586             ch = readChar();
 587         }
 588 
 589     }
 590 
 591     private class UnitsState extends LexerState {
 592 
 593         private final Recognizer[][] units = {
 594         
 595             // TODO: all units from http://www.w3.org/TR/css3-values/
 596             // If units are added, getType and unitsMask must be updated!
 597             { C, M },
 598             { D, E, G },
 599             { E, M },
 600             { E, X },
 601             { G, R, A, D },
 602             { I, N },
 603             { M, M },
 604             { M, S },
 605             { P, C },
 606             { P, T },
 607             { P, X },
 608             { R, A, D },
 609             { S },
 610             { T, U, R, N },
 611             { (c) -> c == '%'}
 612         };
 613         
 614         // One bit per unit
 615         private int unitsMask = 0x7FFF;
 616 
 617         // Offset into inner array of units
 618         private int index = -1;
 619         
 620         UnitsState() {
 621             super(-1, "UnitsState", null);            
 622         }
 623         
 624         @Override
 625         public int getType() {
 626             
 627             int type = Token.INVALID;
 628                 
 629             // Must keep this in sync with units array.
 630             // Small switch will be faster than Math.log(oldMask)/Math.log(2) 
 631             switch (unitsMask) {
 632                 case 0x1: type = CM; break;
 633                 case 0x2: type = DEG; break;
 634                 case 0x4: type = EMS; break;
 635                 case 0x8: type = EXS; break;
 636                 case 0x10: type = GRAD; break;
 637                 case 0x20: type = IN; break;
 638                 case 0x40: type = MM; break;
 639                 case 0x80: type = MS; break;
 640                 case 0x100: type = PC; break;
 641                 case 0x200: type = PT; break;
 642                 case 0x400: type = PX; break;
 643                 case 0x800: type = RAD; break;
 644                 case 0x1000: type = SECONDS; break;
 645                 case 0x2000: type = TURN; break;
 646                 case 0x4000: type = PERCENTAGE; break;
 647                 default: type = Token.INVALID;
 648             }
 649              
 650             // reset
 651             unitsMask = 0x7fff;
 652             index = -1;
 653             
 654             return type;
 655         }
 656 
 657         @Override
 658         public boolean accepts(int c) {
 659             
 660             // Ensure that something bogus like '10xyzzy' is 
 661             // consumed as a token by only returning false
 662             // if the char is not alpha or %
 663             if (!ALPHA.recognize(c) && c != '%') {
 664                 return false;
 665             }
 666             
 667             // If unitsMask is zero, then we've already figured out that 
 668             // this is an invalid token, but we want to accept c so that 
 669             // '10xyzzy' is consumed as a token, albeit an invalid one.
 670             if (unitsMask == 0) return true;
 671             
 672             index += 1;
 673 
 674             for (int n=0 ; n < units.length; n++) {
 675                 
 676                 final int u = 1 << n;
 677                 
 678                 // the unit at this index already failed. Move on.
 679                 if ((unitsMask & u) == 0) continue;
 680 
 681                 if ((index >= units[n].length) || !(units[n][index].recognize(c))) {
 682                     // not a match, turn off this bit
 683                     unitsMask &= ~u;
 684                 }
 685                     
 686             }
 687 
 688 
 689             return true;
 690         }
 691 
 692     }
 693         
 694     private  void skipComment() throws IOException {
 695         while(ch != -1) {
 696             if (ch == '*') {
 697                 ch = readChar();
 698                 if (ch == '/') {
 699                     offset = pos;
 700                     ch=readChar();
 701                     break;
 702                 }
 703             } else {
 704                 ch = readChar();
 705             }
 706         }
 707     }
 708 
 709     private void skipEOL() throws IOException {
 710 
 711         int lastc = ch;
 712 
 713         while (ch != -1) {
 714 
 715             ch = readChar();
 716 
 717             // EOL is cr, lf, or crlf
 718             if ((ch == '\n') || (lastc == '\r' && ch != '\n')) {
 719                     break;
 720             }
 721         }
 722 
 723     }
 724 
 725     private int pos = 0;
 726     private int offset = 0;
 727     private int line = 1;
 728     private int lastc = -1;
 729 
 730     private int readChar() throws IOException {
 731 
 732         int c = reader.read();
 733 
 734         // only reset line and pos counters after having read a NL since
 735         // a NL token is created after the readChar
 736         if (lastc == '\n' || (lastc == '\r' && c != '\n')) {
 737             // set pos to 1 since we've already read the first char of the new line
 738             pos = 1; 
 739             offset = 0;
 740             line++;
 741         } else {
 742             pos++;
 743         }
 744         
 745         lastc = c;
 746         return c;
 747     }
 748 
 749     Token nextToken() {
 750 
 751         Token tok = null;
 752         if (token != null) {
 753             tok = token;
 754             if (token.getType() != Token.EOF) token = null;
 755         } else {
 756             do {
 757                 tok = getToken();
 758             } while (tok != null &&
 759 //                     tok.getType() != Token.EOF &&
 760                      Token.SKIP_TOKEN.equals(tok));
 761         }
 762 
 763         // reset text buffer and currentState
 764         text.delete(0,text.length());
 765         currentState = initState;
 766 
 767         return tok;
 768     }
 769 
 770     private Token getToken() {
 771 
 772         try {
 773             while (true) {
 774                 charNotConsumed = false;
 775 
 776                 final LexerState[] reachableStates =
 777                         currentState != null ? stateMap.get(currentState) : null;
 778 
 779                 final int max = reachableStates != null ? reachableStates.length : 0;
 780 
 781                 LexerState newState = null;
 782                 for (int n=0; n<max && newState == null; n++) {
 783                     final LexerState reachableState = reachableStates[n];
 784                     if (reachableState.accepts(ch)) {
 785                         newState = reachableState;
 786                     }
 787                 }
 788 
 789                 if (newState != null) {
 790 
 791                     // Some reachable state was reached. Keep going until
 792                     // the char isn't accepted by any state
 793                     currentState = newState;
 794                     text.append((char)ch);
 795                     ch = readChar();
 796                     continue;
 797 
 798                 } else {
 799 
 800                     // If none of the reachable states accepts the char,
 801                     // then see if there is a token.
 802 
 803                     final int type = currentState != null ? currentState.getType() : Token.INVALID;
 804 
 805                     //
 806                     // If the token is INVALID and
 807                     // the currentState is something other than initState, then
 808                     // there is an error, so return INVALID.
 809                      //
 810                     if (type != Token.INVALID ||
 811                         !currentState.equals(initState)) {
 812 
 813                         final String str = text.toString();
 814                         Token tok = new Token(type, str, line, offset);
 815                         // because the next char has already been read, 
 816                         // the next token starts at pos-1
 817                         offset = pos-1;
 818 
 819                         // return here, but the next char has already been read.
 820                         return tok;
 821 
 822                     }
 823                 }
 824 
 825                 // The char wasn't accepted and there was no previous token.
 826                 switch (ch) {
 827 
 828                     case -1:
 829                         token = Token.EOF_TOKEN;
 830                         return token;
 831 
 832                     case '"':
 833                     case '\'':
 834 
 835                         text.append((char)ch);
 836                         final int endq = ch;
 837                         while((ch=readChar()) != -1) {
 838                             text.append((char)ch);
 839                             if (ch == endq) break;
 840                         }
 841 
 842                         if (ch != -1) {
 843                             token = new Token(STRING, text.toString(), line, offset);
 844                             offset = pos;
 845                         } else {
 846                             token = new Token(Token.INVALID, text.toString(), line, offset);
 847                             offset = pos;
 848                         }
 849                         break;
 850 
 851                     case '/':
 852                         ch = readChar();
 853                         if (ch == '*') {
 854                             skipComment();
 855                              if (ch != -1) {
 856                                 continue;
 857                             } else {
 858                                 token = Token.EOF_TOKEN;
 859                                 return token;
 860                             }
 861                         } else if (ch == '/') {
 862                             skipEOL();
 863                             if (ch != -1) {
 864                                 continue;
 865                             } else {
 866                                 token = Token.EOF_TOKEN;
 867                                 return token;
 868                             }
 869                         } else {
 870                             // not a comment - a SOLIDUS
 871                             token = new Token(SOLIDUS,"/", line, offset);
 872                             offset = pos;
 873                             charNotConsumed = true;
 874                         }
 875                         break;
 876 
 877                     case '>':
 878 
 879                         token = new Token(GREATER,">", line, offset);
 880                         offset = pos;
 881                         break;
 882 
 883                     case '{':
 884                         token = new Token(LBRACE,"{", line, offset);
 885                         offset = pos;
 886                         break;
 887 
 888                     case '}':
 889                         token = new Token(RBRACE,"}", line, offset);
 890                         offset = pos;
 891                         break;
 892 
 893                     case ';':
 894                         token = new Token(SEMI,";", line, offset);
 895                         offset = pos;
 896                         break;
 897 
 898                     case ':':
 899                         token = new Token(COLON,":", line, offset);
 900                         offset = pos;
 901                         break;
 902 
 903                     case '*':
 904                         token = new Token(STAR,"*", line, offset);
 905                         offset = pos;
 906                         break;
 907 
 908                     case '(':
 909                         token = new Token(LPAREN,"(", line, offset);
 910                         offset = pos;
 911                         break;
 912 
 913                     case ')':
 914                         token = new Token(RPAREN,")", line, offset);
 915                         offset = pos;
 916                         break;
 917 
 918                     case ',':
 919                         token = new Token(COMMA,",", line, offset);
 920                         offset = pos;
 921                         break;
 922 
 923                     case '.':
 924                         token = new Token(DOT,".", line, offset);
 925                         offset = pos;
 926                         break;
 927 
 928                     case ' ':
 929                     case '\t':
 930                     case '\f':
 931                         token = new Token(WS, Character.toString((char)ch), line, offset);
 932                         offset = pos;
 933                         break;
 934 
 935 
 936                     case '\r':
 937                         token = new Token(NL, "\\r", line, offset);
 938                         // offset and pos are reset on next readChar
 939                         
 940                         ch = readChar();
 941                         if (ch == '\n') {
 942                             token = new Token(NL, "\\r\\n", line, offset);
 943                             // offset and pos are reset on next readChar
 944                         } else {
 945                             // already read the next character, so return
 946                             // return the NL token here (avoid the readChar
 947                             // at the end of the loop below)
 948                             final Token tok = token;
 949                             token = (ch == -1) ? Token.EOF_TOKEN : null;
 950                             return tok;
 951                         }                        
 952                         break;
 953 
 954                     case '\n':
 955                         token = new Token(NL, "\\n", line, offset);
 956                         // offset and pos are reset on next readChar
 957                         break;
 958 
 959                     case '!':
 960                         Token tok = scanImportant();
 961                         return tok;
 962 
 963                     case '@':
 964                         token = new Token(AT_KEYWORD, "@", line, offset);
 965                         offset = pos;
 966                         break;
 967 
 968                     default:
 969 //                      System.err.println("hit default case: ch = " + Character.toString((char)ch));
 970                         token = new Token(Token.INVALID, Character.toString((char)ch), line, offset);
 971                         offset = pos;
 972                         break;
 973                 }
 974 
 975                 if (token == null) {
 976 //                    System.err.println("token is null! ch = " + Character.toString((char)ch));
 977                     token = new Token(Token.INVALID, null, line, offset);
 978                     offset = pos;
 979                 } else if (token.getType() == Token.EOF) {
 980                     return token;
 981                 } 
 982 
 983                 if (ch != -1 && !charNotConsumed) ch = readChar();
 984 
 985                 final Token tok = token;
 986                 token = null;
 987                 return tok;
 988             }
 989         } catch (IOException ioe) {
 990             token = Token.EOF_TOKEN;
 991             return token;
 992         }
 993     }
 994     
 995     private int ch;
 996     private boolean charNotConsumed = false;
 997     private Reader reader;
 998     private Token token;
 999     private final Map<LexerState, LexerState[]> stateMap;
1000     private LexerState currentState;
1001     private final StringBuilder text;
1002 
1003 }