1 /*
   2  * Permission is hereby granted, free of charge, to any person obtaining a copy of
   3  * this software and associated documentation files (the "Software"), to deal in
   4  * the Software without restriction, including without limitation the rights to
   5  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
   6  * of the Software, and to permit persons to whom the Software is furnished to do
   7  * so, subject to the following conditions:
   8  *
   9  * The above copyright notice and this permission notice shall be included in all
  10  * copies or substantial portions of the Software.
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  17  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  18  * SOFTWARE.
  19  */
  20 package jdk.nashorn.internal.runtime.regexp.joni;
  21 
  22 import static jdk.nashorn.internal.runtime.regexp.joni.Option.isSingleline;
  23 import static jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode.isRepeatInfinite;
  24 import jdk.nashorn.internal.runtime.regexp.joni.ast.QuantifierNode;
  25 import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
  26 import jdk.nashorn.internal.runtime.regexp.joni.constants.MetaChar;
  27 import jdk.nashorn.internal.runtime.regexp.joni.constants.TokenType;
  28 import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
  29 import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
  30 import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
  31 import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
  32 
  33 class Lexer extends ScannerSupport {
  34     protected final ScanEnvironment env;
  35     protected final Syntax syntax;              // fast access to syntax
  36     protected final Token token = new Token();  // current token
  37 
  38     protected Lexer(final ScanEnvironment env, final char[] chars, final int p, final int end) {
  39         super(chars, p, end);
  40         this.env = env;
  41         this.syntax = env.syntax;
  42     }
  43 
  44     /**
  45      * @return 0: normal {n,m}, 2: fixed {n}
  46      * !introduce returnCode here
  47      */
  48     private int fetchRangeQuantifier() {
  49         mark();
  50         final boolean synAllow = syntax.allowInvalidInterval();
  51 
  52         if (!left()) {
  53             if (synAllow) {
  54                 return 1; /* "....{" : OK! */
  55             }
  56             throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
  57         }
  58 
  59         if (!synAllow) {
  60             c = peek();
  61             if (c == ')' || c == '(' || c == '|') {
  62                 throw new SyntaxException(ERR_END_PATTERN_AT_LEFT_BRACE);
  63             }
  64         }
  65 
  66         int low = scanUnsignedNumber();
  67         if (low < 0) {
  68             throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
  69         }
  70         if (low > Config.MAX_REPEAT_NUM) {
  71             throw new SyntaxException(ErrorMessages.ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
  72         }
  73 
  74         boolean nonLow = false;
  75         if (p == _p) { /* can't read low */
  76             if (syntax.allowIntervalLowAbbrev()) {
  77                 low = 0;
  78                 nonLow = true;
  79             } else {
  80                 return invalidRangeQuantifier(synAllow);
  81             }
  82         }
  83 
  84         if (!left()) {
  85             return invalidRangeQuantifier(synAllow);
  86         }
  87 
  88         fetch();
  89         int up;
  90         int ret = 0;
  91         if (c == ',') {
  92             final int prev = p; // ??? last
  93             up = scanUnsignedNumber();
  94             if (up < 0) {
  95                 throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
  96             }
  97             if (up > Config.MAX_REPEAT_NUM) {
  98                 throw new ValueException(ERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE);
  99             }
 100 
 101             if (p == prev) {
 102                 if (nonLow) {
 103                     return invalidRangeQuantifier(synAllow);
 104                 }
 105                 up = QuantifierNode.REPEAT_INFINITE; /* {n,} : {n,infinite} */
 106             }
 107         } else {
 108             if (nonLow) {
 109                 return invalidRangeQuantifier(synAllow);
 110             }
 111             unfetch();
 112             up = low; /* {n} : exact n times */
 113             ret = 2; /* fixed */
 114         }
 115 
 116         if (!left()) {
 117             return invalidRangeQuantifier(synAllow);
 118         }
 119         fetch();
 120 
 121         if (syntax.opEscBraceInterval()) {
 122             if (c != syntax.metaCharTable.esc) {
 123                 return invalidRangeQuantifier(synAllow);
 124             }
 125             fetch();
 126         }
 127 
 128         if (c != '}') {
 129             return invalidRangeQuantifier(synAllow);
 130         }
 131 
 132         if (!isRepeatInfinite(up) && low > up) {
 133             throw new ValueException(ERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE);
 134         }
 135 
 136         token.type = TokenType.INTERVAL;
 137         token.setRepeatLower(low);
 138         token.setRepeatUpper(up);
 139 
 140         return ret; /* 0: normal {n,m}, 2: fixed {n} */
 141     }
 142 
 143     private int invalidRangeQuantifier(final boolean synAllow) {
 144         if (synAllow) {
 145             restore();
 146             return 1;
 147         }
 148         throw new SyntaxException(ERR_INVALID_REPEAT_RANGE_PATTERN);
 149     }
 150 
 151     @SuppressWarnings("fallthrough")
 152     /* \M-, \C-, \c, or \... */
 153     private int fetchEscapedValue() {
 154         if (!left()) {
 155             throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
 156         }
 157         fetch();
 158 
 159         switch(c) {
 160 
 161         case 'M':
 162             if (syntax.op2EscCapitalMBarMeta()) {
 163                 if (!left()) {
 164                     throw new SyntaxException(ERR_END_PATTERN_AT_META);
 165                 }
 166                 fetch();
 167                 if (c != '-') {
 168                     throw new SyntaxException(ERR_META_CODE_SYNTAX);
 169                 }
 170                 if (!left()) {
 171                     throw new SyntaxException(ERR_END_PATTERN_AT_META);
 172                 }
 173                 fetch();
 174                 if (c == syntax.metaCharTable.esc) {
 175                     c = fetchEscapedValue();
 176                 }
 177                 c = ((c & 0xff) | 0x80);
 178             } else {
 179                 fetchEscapedValueBackSlash();
 180             }
 181             break;
 182 
 183         case 'C':
 184             if (syntax.op2EscCapitalCBarControl()) {
 185                 if (!left()) {
 186                     throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
 187                 }
 188                 fetch();
 189                 if (c != '-') {
 190                     throw new SyntaxException(ERR_CONTROL_CODE_SYNTAX);
 191                 }
 192                 fetchEscapedValueControl();
 193             } else {
 194                 fetchEscapedValueBackSlash();
 195             }
 196             break;
 197 
 198         case 'c':
 199             if (syntax.opEscCControl()) {
 200                 fetchEscapedValueControl();
 201             }
 202             /* fall through */
 203 
 204         default:
 205             fetchEscapedValueBackSlash();
 206         } // switch
 207 
 208         return c; // ???
 209     }
 210 
 211     private void fetchEscapedValueBackSlash() {
 212         c = env.convertBackslashValue(c);
 213     }
 214 
 215     private void fetchEscapedValueControl() {
 216         if (!left()) {
 217             throw new SyntaxException(ERR_END_PATTERN_AT_CONTROL);
 218         }
 219         fetch();
 220         if (c == '?') {
 221             c = 0177;
 222         } else {
 223             if (c == syntax.metaCharTable.esc) {
 224                 c = fetchEscapedValue();
 225             }
 226             c &= 0x9f;
 227         }
 228     }
 229 
 230     private void fetchTokenInCCFor_charType(final boolean flag, final int type) {
 231         token.type = TokenType.CHAR_TYPE;
 232         token.setPropCType(type);
 233         token.setPropNot(flag);
 234     }
 235 
 236     private void fetchTokenInCCFor_x() {
 237         if (!left()) {
 238             return;
 239         }
 240         final int last = p;
 241 
 242         if (peekIs('{') && syntax.opEscXBraceHex8()) {
 243             inc();
 244             final int num = scanUnsignedHexadecimalNumber(8);
 245             if (num < 0) {
 246                 throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
 247             }
 248             if (left()) {
 249                 final int c2 = peek();
 250                 if (EncodingHelper.isXDigit(c2)) {
 251                     throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
 252                 }
 253             }
 254 
 255             if (p > last + 1 && left() && peekIs('}')) {
 256                 inc();
 257                 token.type = TokenType.CODE_POINT;
 258                 token.setCode(num);
 259             } else {
 260                 /* can't read nothing or invalid format */
 261                 p = last;
 262             }
 263         } else if (syntax.opEscXHex2()) {
 264             int num = scanUnsignedHexadecimalNumber(2);
 265             if (num < 0) {
 266                 throw new ValueException(ERR_TOO_BIG_NUMBER);
 267             }
 268             if (p == last) { /* can't read nothing. */
 269                 num = 0; /* but, it's not error */
 270             }
 271             token.type = TokenType.RAW_BYTE;
 272             token.setC(num);
 273         }
 274     }
 275 
 276     private void fetchTokenInCCFor_u() {
 277         if (!left()) {
 278             return;
 279         }
 280         final int last = p;
 281 
 282         if (syntax.op2EscUHex4()) {
 283             int num = scanUnsignedHexadecimalNumber(4);
 284             if (num < 0) {
 285                 throw new ValueException(ERR_TOO_BIG_NUMBER);
 286             }
 287             if (p == last) {  /* can't read nothing. */
 288                 num = 0; /* but, it's not error */
 289             }
 290             token.type = TokenType.CODE_POINT;
 291             token.setCode(num);
 292         }
 293     }
 294 
 295     private void fetchTokenInCCFor_digit() {
 296         if (syntax.opEscOctal3()) {
 297             unfetch();
 298             final int last = p;
 299             int num = scanUnsignedOctalNumber(3);
 300             if (num < 0) {
 301                 throw new ValueException(ERR_TOO_BIG_NUMBER);
 302             }
 303             if (p == last) {  /* can't read nothing. */
 304                 num = 0; /* but, it's not error */
 305             }
 306             token.type = TokenType.RAW_BYTE;
 307             token.setC(num);
 308         }
 309     }
 310 
 311     private void fetchTokenInCCFor_and() {
 312         if (syntax.op2CClassSetOp() && left() && peekIs('&')) {
 313             inc();
 314             token.type = TokenType.CC_AND;
 315         }
 316     }
 317 
 318     protected final TokenType fetchTokenInCC() {
 319         if (!left()) {
 320             token.type = TokenType.EOT;
 321             return token.type;
 322         }
 323 
 324         fetch();
 325         token.type = TokenType.CHAR;
 326         token.setC(c);
 327         token.escaped = false;
 328 
 329         if (c == ']') {
 330             token.type = TokenType.CC_CLOSE;
 331         } else if (c == '-') {
 332             token.type = TokenType.CC_RANGE;
 333         } else if (c == syntax.metaCharTable.esc) {
 334             if (!syntax.backSlashEscapeInCC()) {
 335                 return token.type;
 336             }
 337             if (!left()) {
 338                 throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
 339             }
 340             fetch();
 341             token.escaped = true;
 342             token.setC(c);
 343 
 344             switch (c) {
 345             case 'w':
 346                 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
 347                 break;
 348             case 'W':
 349                 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
 350                 break;
 351             case 'd':
 352                 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
 353                 break;
 354             case 'D':
 355                 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
 356                 break;
 357             case 's':
 358                 fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
 359                 break;
 360             case 'S':
 361                 fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
 362                 break;
 363             case 'h':
 364                 if (syntax.op2EscHXDigit()) {
 365                     fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
 366                 }
 367                 break;
 368             case 'H':
 369                 if (syntax.op2EscHXDigit()) {
 370                     fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
 371                 }
 372                 break;
 373             case 'x':
 374                 fetchTokenInCCFor_x();
 375                 break;
 376             case 'u':
 377                 fetchTokenInCCFor_u();
 378                 break;
 379             case '0':
 380             case '1':
 381             case '2':
 382             case '3':
 383             case '4':
 384             case '5':
 385             case '6':
 386             case '7':
 387                 fetchTokenInCCFor_digit();
 388                 break;
 389 
 390             default:
 391                 unfetch();
 392                 final int num = fetchEscapedValue();
 393                 if (token.getC() != num) {
 394                     token.setCode(num);
 395                     token.type = TokenType.CODE_POINT;
 396                 }
 397                 break;
 398             } // switch
 399 
 400         } else if (c == '&') {
 401             fetchTokenInCCFor_and();
 402         }
 403         return token.type;
 404     }
 405 
 406     private void fetchTokenFor_repeat(final int lower, final int upper) {
 407         token.type = TokenType.OP_REPEAT;
 408         token.setRepeatLower(lower);
 409         token.setRepeatUpper(upper);
 410         greedyCheck();
 411     }
 412 
 413     private void fetchTokenFor_openBrace() {
 414         switch (fetchRangeQuantifier()) {
 415         case 0:
 416             greedyCheck();
 417             break;
 418         case 2:
 419             if (syntax.fixedIntervalIsGreedyOnly()) {
 420                 possessiveCheck();
 421             } else {
 422                 greedyCheck();
 423             }
 424             break;
 425         default: /* 1 : normal char */
 426         } // inner switch
 427     }
 428 
 429     private void fetchTokenFor_anchor(final int subType) {
 430         token.type = TokenType.ANCHOR;
 431         token.setAnchor(subType);
 432     }
 433 
 434     private void fetchTokenFor_xBrace() {
 435         if (!left()) {
 436             return;
 437         }
 438 
 439         final int last = p;
 440         if (peekIs('{') && syntax.opEscXBraceHex8()) {
 441             inc();
 442             final int num = scanUnsignedHexadecimalNumber(8);
 443             if (num < 0) {
 444                 throw new ValueException(ERR_TOO_BIG_WIDE_CHAR_VALUE);
 445             }
 446             if (left()) {
 447                 if (EncodingHelper.isXDigit(peek())) {
 448                     throw new ValueException(ERR_TOO_LONG_WIDE_CHAR_VALUE);
 449                 }
 450             }
 451 
 452             if (p > last + 1 && left() && peekIs('}')) {
 453                 inc();
 454                 token.type = TokenType.CODE_POINT;
 455                 token.setCode(num);
 456             } else {
 457                 /* can't read nothing or invalid format */
 458                 p = last;
 459             }
 460         } else if (syntax.opEscXHex2()) {
 461             int num = scanUnsignedHexadecimalNumber(2);
 462             if (num < 0) {
 463                 throw new ValueException(ERR_TOO_BIG_NUMBER);
 464             }
 465             if (p == last) { /* can't read nothing. */
 466                 num = 0; /* but, it's not error */
 467             }
 468             token.type = TokenType.RAW_BYTE;
 469             token.setC(num);
 470         }
 471     }
 472 
 473     private void fetchTokenFor_uHex() {
 474         if (!left()) {
 475             return;
 476         }
 477         final int last = p;
 478 
 479         if (syntax.op2EscUHex4()) {
 480             int num = scanUnsignedHexadecimalNumber(4);
 481             if (num < 0) {
 482                 throw new ValueException(ERR_TOO_BIG_NUMBER);
 483             }
 484             if (p == last) { /* can't read nothing. */
 485                 num = 0; /* but, it's not error */
 486             }
 487             token.type = TokenType.CODE_POINT;
 488             token.setCode(num);
 489         }
 490     }
 491 
 492     private void fetchTokenFor_digit() {
 493         unfetch();
 494         final int last = p;
 495         final int num = scanUnsignedNumber();
 496         if (num < 0 || num > Config.MAX_BACKREF_NUM) { // goto skip_backref
 497         } else if (syntax.opDecimalBackref() && (num <= env.numMem || num <= 9)) { /* This spec. from GNU regex */
 498             if (syntax.strictCheckBackref()) {
 499                 if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) {
 500                     throw new ValueException(ERR_INVALID_BACKREF);
 501                 }
 502             }
 503             token.type = TokenType.BACKREF;
 504             token.setBackrefRef(num);
 505             return;
 506         }
 507 
 508         if (c == '8' || c == '9') { /* normal char */ // skip_backref:
 509             p = last;
 510             inc();
 511             return;
 512         }
 513         p = last;
 514 
 515         fetchTokenFor_zero(); /* fall through */
 516     }
 517 
 518     private void fetchTokenFor_zero() {
 519         if (syntax.opEscOctal3()) {
 520             final int last = p;
 521             int num = scanUnsignedOctalNumber(c == '0' ? 2 : 3);
 522             if (num < 0) {
 523                 throw new ValueException(ERR_TOO_BIG_NUMBER);
 524             }
 525             if (p == last) { /* can't read nothing. */
 526                 num = 0; /* but, it's not error */
 527             }
 528             token.type = TokenType.RAW_BYTE;
 529             token.setC(num);
 530         } else if (c != '0') {
 531             inc();
 532         }
 533     }
 534 
 535     private void fetchTokenFor_metaChars() {
 536         if (c == syntax.metaCharTable.anyChar) {
 537             token.type = TokenType.ANYCHAR;
 538         } else if (c == syntax.metaCharTable.anyTime) {
 539             fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
 540         }  else if (c == syntax.metaCharTable.zeroOrOneTime) {
 541             fetchTokenFor_repeat(0, 1);
 542         } else if (c == syntax.metaCharTable.oneOrMoreTime) {
 543             fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
 544         } else if (c == syntax.metaCharTable.anyCharAnyTime) {
 545             token.type = TokenType.ANYCHAR_ANYTIME;
 546             // goto out
 547         }
 548     }
 549 
 550     protected final TokenType fetchToken() {
 551         // mark(); // out
 552         start:
 553         while(true) {
 554             if (!left()) {
 555                 token.type = TokenType.EOT;
 556                 return token.type;
 557             }
 558 
 559             token.type = TokenType.STRING;
 560             token.backP = p;
 561 
 562             fetch();
 563 
 564             if (c == syntax.metaCharTable.esc && !syntax.op2IneffectiveEscape()) { // IS_MC_ESC_CODE(code, syn)
 565                 if (!left()) {
 566                     throw new SyntaxException(ERR_END_PATTERN_AT_ESCAPE);
 567                 }
 568 
 569                 token.backP = p;
 570                 fetch();
 571 
 572                 token.setC(c);
 573                 token.escaped = true;
 574                 switch(c) {
 575 
 576                 case '*':
 577                     if (syntax.opEscAsteriskZeroInf()) {
 578                         fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
 579                     }
 580                     break;
 581                 case '+':
 582                     if (syntax.opEscPlusOneInf()) {
 583                         fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
 584                     }
 585                     break;
 586                 case '?':
 587                     if (syntax.opEscQMarkZeroOne()) {
 588                         fetchTokenFor_repeat(0, 1);
 589                     }
 590                     break;
 591                 case '{':
 592                     if (syntax.opEscBraceInterval()) {
 593                         fetchTokenFor_openBrace();
 594                     }
 595                     break;
 596                 case '|':
 597                     if (syntax.opEscVBarAlt()) {
 598                         token.type = TokenType.ALT;
 599                     }
 600                     break;
 601                 case '(':
 602                     if (syntax.opEscLParenSubexp()) {
 603                         token.type = TokenType.SUBEXP_OPEN;
 604                     }
 605                     break;
 606                 case ')':
 607                     if (syntax.opEscLParenSubexp()) {
 608                         token.type = TokenType.SUBEXP_CLOSE;
 609                     }
 610                     break;
 611                 case 'w':
 612                     if (syntax.opEscWWord()) {
 613                         fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
 614                     }
 615                     break;
 616                 case 'W':
 617                     if (syntax.opEscWWord()) {
 618                         fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD);
 619                     }
 620                     break;
 621                 case 'b':
 622                     if (syntax.opEscBWordBound()) {
 623                         fetchTokenFor_anchor(AnchorType.WORD_BOUND);
 624                     }
 625                     break;
 626                 case 'B':
 627                     if (syntax.opEscBWordBound()) {
 628                         fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND);
 629                     }
 630                     break;
 631                 case '<':
 632                     if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
 633                         fetchTokenFor_anchor(AnchorType.WORD_BEGIN);
 634                     }
 635                     break;
 636                 case '>':
 637                     if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) {
 638                         fetchTokenFor_anchor(AnchorType.WORD_END);
 639                     }
 640                     break;
 641                 case 's':
 642                     if (syntax.opEscSWhiteSpace()) {
 643                         fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
 644                     }
 645                     break;
 646                 case 'S':
 647                     if (syntax.opEscSWhiteSpace()) {
 648                         fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE);
 649                     }
 650                     break;
 651                 case 'd':
 652                     if (syntax.opEscDDigit()) {
 653                         fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
 654                     }
 655                     break;
 656                 case 'D':
 657                     if (syntax.opEscDDigit()) {
 658                         fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.D : CharacterType.DIGIT);
 659                     }
 660                     break;
 661                 case 'h':
 662                     if (syntax.op2EscHXDigit()) {
 663                         fetchTokenInCCFor_charType(false, CharacterType.XDIGIT);
 664                     }
 665                     break;
 666                 case 'H':
 667                     if (syntax.op2EscHXDigit()) {
 668                         fetchTokenInCCFor_charType(true, CharacterType.XDIGIT);
 669                     }
 670                     break;
 671                 case 'A':
 672                     if (syntax.opEscAZBufAnchor()) {
 673                         fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
 674                     }
 675                     break;
 676                 case 'Z':
 677                     if (syntax.opEscAZBufAnchor()) {
 678                         fetchTokenFor_anchor(AnchorType.SEMI_END_BUF);
 679                     }
 680                     break;
 681                 case 'z':
 682                     if (syntax.opEscAZBufAnchor()) {
 683                         fetchTokenFor_anchor(AnchorType.END_BUF);
 684                     }
 685                     break;
 686                 case 'G':
 687                     if (syntax.opEscCapitalGBeginAnchor()) {
 688                         fetchTokenFor_anchor(AnchorType.BEGIN_POSITION);
 689                     }
 690                     break;
 691                 case '`':
 692                     if (syntax.op2EscGnuBufAnchor()) {
 693                         fetchTokenFor_anchor(AnchorType.BEGIN_BUF);
 694                     }
 695                     break;
 696                 case '\'':
 697                     if (syntax.op2EscGnuBufAnchor()) {
 698                         fetchTokenFor_anchor(AnchorType.END_BUF);
 699                     }
 700                     break;
 701                 case 'x':
 702                     fetchTokenFor_xBrace();
 703                     break;
 704                 case 'u':
 705                     fetchTokenFor_uHex();
 706                     break;
 707                 case '1':
 708                 case '2':
 709                 case '3':
 710                 case '4':
 711                 case '5':
 712                 case '6':
 713                 case '7':
 714                 case '8':
 715                 case '9':
 716                     fetchTokenFor_digit();
 717                     break;
 718                 case '0':
 719                     fetchTokenFor_zero();
 720                     break;
 721 
 722                 default:
 723                     unfetch();
 724                     final int num = fetchEscapedValue();
 725 
 726                     /* set_raw: */
 727                     if (token.getC() != num) {
 728                         token.type = TokenType.CODE_POINT;
 729                         token.setCode(num);
 730                     } else { /* string */
 731                         p = token.backP + 1;
 732                     }
 733                     break;
 734 
 735                 } // switch (c)
 736 
 737             } else {
 738                 token.setC(c);
 739                 token.escaped = false;
 740 
 741                 if (Config.USE_VARIABLE_META_CHARS && (c != MetaChar.INEFFECTIVE_META_CHAR && syntax.opVariableMetaCharacters())) {
 742                     fetchTokenFor_metaChars();
 743                     break;
 744                 }
 745 
 746                 {
 747                     switch(c) {
 748                     case '.':
 749                         if (syntax.opDotAnyChar()) {
 750                             token.type = TokenType.ANYCHAR;
 751                         }
 752                         break;
 753                     case '*':
 754                         if (syntax.opAsteriskZeroInf()) {
 755                             fetchTokenFor_repeat(0, QuantifierNode.REPEAT_INFINITE);
 756                         }
 757                         break;
 758                     case '+':
 759                         if (syntax.opPlusOneInf()) {
 760                             fetchTokenFor_repeat(1, QuantifierNode.REPEAT_INFINITE);
 761                         }
 762                         break;
 763                     case '?':
 764                         if (syntax.opQMarkZeroOne()) {
 765                             fetchTokenFor_repeat(0, 1);
 766                         }
 767                         break;
 768                     case '{':
 769                         if (syntax.opBraceInterval()) {
 770                             fetchTokenFor_openBrace();
 771                         }
 772                         break;
 773                     case '|':
 774                         if (syntax.opVBarAlt()) {
 775                             token.type = TokenType.ALT;
 776                         }
 777                         break;
 778 
 779                     case '(':
 780                         if (peekIs('?') && syntax.op2QMarkGroupEffect()) {
 781                             inc();
 782                             if (peekIs('#')) {
 783                                 fetch();
 784                                 while (true) {
 785                                     if (!left()) {
 786                                         throw new SyntaxException(ERR_END_PATTERN_IN_GROUP);
 787                                     }
 788                                     fetch();
 789                                     if (c == syntax.metaCharTable.esc) {
 790                                         if (left()) {
 791                                             fetch();
 792                                         }
 793                                     } else {
 794                                         if (c == ')') {
 795                                             break;
 796                                         }
 797                                     }
 798                                 }
 799                                 continue start; // goto start
 800                             }
 801                             unfetch();
 802                         }
 803 
 804                         if (syntax.opLParenSubexp()) {
 805                             token.type = TokenType.SUBEXP_OPEN;
 806                         }
 807                         break;
 808                     case ')':
 809                         if (syntax.opLParenSubexp()) {
 810                             token.type = TokenType.SUBEXP_CLOSE;
 811                         }
 812                         break;
 813                     case '^':
 814                         if (syntax.opLineAnchor()) {
 815                             fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.BEGIN_BUF : AnchorType.BEGIN_LINE);
 816                         }
 817                         break;
 818                     case '$':
 819                         if (syntax.opLineAnchor()) {
 820                             fetchTokenFor_anchor(isSingleline(env.option) ? AnchorType.END_BUF : AnchorType.END_LINE);
 821                         }
 822                         break;
 823                     case '[':
 824                         if (syntax.opBracketCC()) {
 825                             token.type = TokenType.CC_CC_OPEN;
 826                         }
 827                         break;
 828                     case ']':
 829                         //if (*src > env->pattern)   /* /].../ is allowed. */
 830                         //CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
 831                         break;
 832                     case '#':
 833                         if (Option.isExtend(env.option)) {
 834                             while (left()) {
 835                                 fetch();
 836                                 if (EncodingHelper.isNewLine(c)) {
 837                                     break;
 838                                 }
 839                             }
 840                             continue start; // goto start
 841                         }
 842                         break;
 843 
 844                     case ' ':
 845                     case '\t':
 846                     case '\n':
 847                     case '\r':
 848                     case '\f':
 849                         if (Option.isExtend(env.option))
 850                          {
 851                             continue start; // goto start
 852                         }
 853                         break;
 854 
 855                     default: // string
 856                         break;
 857 
 858                     } // switch
 859                 }
 860             }
 861 
 862             break;
 863         } // while
 864         return token.type;
 865     }
 866 
 867     private void greedyCheck() {
 868         if (left() && peekIs('?') && syntax.opQMarkNonGreedy()) {
 869 
 870             fetch();
 871 
 872             token.setRepeatGreedy(false);
 873             token.setRepeatPossessive(false);
 874         } else {
 875             possessiveCheck();
 876         }
 877     }
 878 
 879     private void possessiveCheck() {
 880         if (left() && peekIs('+') &&
 881             (syntax.op2PlusPossessiveRepeat() && token.type != TokenType.INTERVAL ||
 882              syntax.op2PlusPossessiveInterval() && token.type == TokenType.INTERVAL)) {
 883 
 884             fetch();
 885 
 886             token.setRepeatGreedy(true);
 887             token.setRepeatPossessive(true);
 888         } else {
 889             token.setRepeatGreedy(true);
 890             token.setRepeatPossessive(false);
 891         }
 892     }
 893 
 894     protected final void syntaxWarn(final String message, final char ch) {
 895         syntaxWarn(message.replace("<%n>", Character.toString(ch)));
 896     }
 897 
 898     protected final void syntaxWarn(final String message) {
 899         if (Config.USE_WARN) {
 900             env.reg.warnings.warn(message + ": /" + new String(chars, getBegin(), getEnd()) + "/");
 901         }
 902     }
 903 }